loopguard-ai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. loopguard_ai-0.1.0/.github/workflows/ci.yml +63 -0
  2. loopguard_ai-0.1.0/.github/workflows/publish.yml +28 -0
  3. loopguard_ai-0.1.0/.gitignore +17 -0
  4. loopguard_ai-0.1.0/LICENSE +21 -0
  5. loopguard_ai-0.1.0/PKG-INFO +234 -0
  6. loopguard_ai-0.1.0/README.md +197 -0
  7. loopguard_ai-0.1.0/blog/loop-guard-announcement.md +195 -0
  8. loopguard_ai-0.1.0/examples/adk_demo.py +83 -0
  9. loopguard_ai-0.1.0/examples/autoresearch_demo.py +107 -0
  10. loopguard_ai-0.1.0/examples/autoresearch_karpathy_analysis.py +199 -0
  11. loopguard_ai-0.1.0/examples/autoresearch_real_demo.py +102 -0
  12. loopguard_ai-0.1.0/examples/openai_sdk_demo.py +96 -0
  13. loopguard_ai-0.1.0/experiments/anthropic_real_demo.py +651 -0
  14. loopguard_ai-0.1.0/experiments/gemini_real_demo.py +176 -0
  15. loopguard_ai-0.1.0/experiments/openai_real_demo.py +319 -0
  16. loopguard_ai-0.1.0/experiments/real_training_loop.py +355 -0
  17. loopguard_ai-0.1.0/loop_guard/__init__.py +23 -0
  18. loopguard_ai-0.1.0/loop_guard/cli.py +388 -0
  19. loopguard_ai-0.1.0/loop_guard/engine.py +78 -0
  20. loopguard_ai-0.1.0/loop_guard/extractor.py +237 -0
  21. loopguard_ai-0.1.0/loop_guard/guard.py +80 -0
  22. loopguard_ai-0.1.0/loop_guard/integrations/__init__.py +1 -0
  23. loopguard_ai-0.1.0/loop_guard/integrations/anthropic_sdk.py +283 -0
  24. loopguard_ai-0.1.0/loop_guard/integrations/autoresearch.py +411 -0
  25. loopguard_ai-0.1.0/loop_guard/integrations/google_adk.py +374 -0
  26. loopguard_ai-0.1.0/loop_guard/integrations/openai_agents.py +478 -0
  27. loopguard_ai-0.1.0/loop_guard/models.py +93 -0
  28. loopguard_ai-0.1.0/loop_guard/provenance.py +238 -0
  29. loopguard_ai-0.1.0/loop_guard/reporter.py +143 -0
  30. loopguard_ai-0.1.0/loop_guard/verifiers/__init__.py +17 -0
  31. loopguard_ai-0.1.0/loop_guard/verifiers/citation.py +188 -0
  32. loopguard_ai-0.1.0/loop_guard/verifiers/code_output.py +163 -0
  33. loopguard_ai-0.1.0/loop_guard/verifiers/loop_trap.py +70 -0
  34. loopguard_ai-0.1.0/loop_guard/verifiers/metric.py +163 -0
  35. loopguard_ai-0.1.0/loop_guard/verifiers/regression.py +79 -0
  36. loopguard_ai-0.1.0/loop_guard/verifiers/statistical.py +183 -0
  37. loopguard_ai-0.1.0/loop_guard/verifiers/tool_output.py +312 -0
  38. loopguard_ai-0.1.0/pyproject.toml +74 -0
  39. loopguard_ai-0.1.0/real_experiment_results.tsv +16 -0
  40. loopguard_ai-0.1.0/spec.md +901 -0
  41. loopguard_ai-0.1.0/tests/__init__.py +0 -0
  42. loopguard_ai-0.1.0/tests/test_autoresearch.py +121 -0
  43. loopguard_ai-0.1.0/tests/test_citation.py +104 -0
  44. loopguard_ai-0.1.0/tests/test_extractor.py +104 -0
  45. loopguard_ai-0.1.0/tests/test_integration.py +156 -0
  46. loopguard_ai-0.1.0/tests/test_loop_trap.py +82 -0
  47. loopguard_ai-0.1.0/tests/test_provenance.py +128 -0
  48. loopguard_ai-0.1.0/tests/test_regression.py +118 -0
  49. loopguard_ai-0.1.0/tests/test_statistical.py +101 -0
  50. loopguard_ai-0.1.0/tests/test_tool_output.py +130 -0
@@ -0,0 +1,63 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install -e ".[dev]"
28
+
29
+ - name: Lint with ruff
30
+ run: |
31
+ ruff check loop_guard/ tests/
32
+
33
+ - name: Run tests
34
+ run: |
35
+ pytest tests/ -v --tb=short
36
+
37
+ - name: Check import
38
+ run: |
39
+ python -c "from loop_guard import LoopGuard, __version__; print(f'loop-guard v{__version__}')"
40
+
41
+ publish:
42
+ needs: test
43
+ runs-on: ubuntu-latest
44
+ if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
45
+ permissions:
46
+ id-token: write
47
+ steps:
48
+ - uses: actions/checkout@v4
49
+
50
+ - name: Set up Python
51
+ uses: actions/setup-python@v5
52
+ with:
53
+ python-version: "3.12"
54
+
55
+ - name: Install build tools
56
+ run: |
57
+ python -m pip install --upgrade pip build
58
+
59
+ - name: Build package
60
+ run: python -m build
61
+
62
+ - name: Publish to PyPI
63
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,28 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.12"
20
+
21
+ - name: Install build tools
22
+ run: python -m pip install --upgrade pip build
23
+
24
+ - name: Build package
25
+ run: python -m build
26
+
27
+ - name: Publish to PyPI
28
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,17 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .pytest_cache/
10
+ .hypothesis/
11
+ .mypy_cache/
12
+ .ruff_cache/
13
+ *.html
14
+ *.json
15
+ !pyproject.toml
16
+ .omc/
17
+ .env
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 LoopGuard Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,234 @@
1
+ Metadata-Version: 2.4
2
+ Name: loopguard-ai
3
+ Version: 0.1.0
4
+ Summary: Deterministic verification for autonomous agent loops
5
+ Project-URL: Homepage, https://github.com/ybkim95/loop-guard
6
+ Project-URL: Repository, https://github.com/ybkim95/loop-guard
7
+ Project-URL: Issues, https://github.com/ybkim95/loop-guard/issues
8
+ Author: LoopGuard Contributors
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: agents,autonomous,deterministic,llm,testing,verification
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Quality Assurance
22
+ Classifier: Topic :: Software Development :: Testing
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: httpx>=0.27.0
25
+ Provides-Extra: all
26
+ Requires-Dist: loop-guard[dev,llm]; extra == 'all'
27
+ Provides-Extra: dev
28
+ Requires-Dist: mypy>=1.13; extra == 'dev'
29
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
30
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
31
+ Requires-Dist: pytest>=8.0; extra == 'dev'
32
+ Requires-Dist: respx>=0.22.0; extra == 'dev'
33
+ Requires-Dist: ruff>=0.8.0; extra == 'dev'
34
+ Provides-Extra: llm
35
+ Requires-Dist: anthropic>=0.40.0; extra == 'llm'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # loop-guard
39
+
40
+ **Deterministic verification for autonomous agent loops.**
41
+
42
+ loop-guard catches silent errors in agent loops by re-running code, looking up citations, and checking statistics — not by asking another LLM if the output "looks right."
43
+
44
+ [![PyPI](https://img.shields.io/pypi/v/loop-guard)](https://pypi.org/project/loop-guard/)
45
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue)](https://python.org)
46
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
47
+
48
+ ## The Problem
49
+
50
+ Autonomous agent loops run for hours without human oversight. Agents make intermediate claims — "accuracy is 94%", "tests pass", "p < 0.05" — that compound over hundreds of steps. A wrong claim at step 23 becomes the premise for steps 24–100. Nobody catches the error until a human reviews the final output, if at all.
51
+
52
+ ## How loop-guard Works
53
+
54
+ ```
55
+ Agent Loop → Integration Layer → Claim Extractor → Verification Engine → Reporter
56
+ (regex-first) (3 layers) (terminal/JSON/HTML)
57
+ ```
58
+
59
+ **Three verification layers, in order of reliability:**
60
+
61
+ | Layer | Method | Reliability | Example |
62
+ |-------|--------|-------------|---------|
63
+ | **L1: Deterministic** | Re-execute code, API lookup, re-compute | Cannot be wrong | Citation lookup, code re-run |
64
+ | **L2: Rule-based** | Pattern matching, sanity checks | Rarely wrong | p > 1 detection, loop trap |
65
+ | **L3: LLM-assisted** | Soft flagging only | May be wrong | General claim flagging |
66
+
67
+ **Key principle:** Verification must be more reliable than the thing being verified. LLMs are used only for claim extraction (a structured task), never for judgment.
68
+
69
+ ## Install
70
+
71
+ ```bash
72
+ pip install loop-guard
73
+
74
+ # With LLM-based claim extraction (optional)
75
+ pip install loop-guard[llm]
76
+ ```
77
+
78
+ ## Quick Start
79
+
80
+ ### Python API (2 lines to integrate)
81
+
82
+ ```python
83
+ from loop_guard import LoopGuard
84
+
85
+ guard = LoopGuard()
86
+
87
+ # Works with ANY agent loop — OpenAI, Anthropic, Google ADK, LangGraph, custom
88
+ for i in range(num_experiments):
89
+ result = agent.run(task)
90
+ findings = guard.step(
91
+ output=result.text,
92
+ code=result.code_executed, # optional
93
+ files=result.files_modified, # optional
94
+ )
95
+ for f in findings:
96
+ print(f) # FAIL/WARN/FLAG with explanation
97
+
98
+ # Generate report
99
+ guard.report(format="html", path="audit.html")
100
+ ```
101
+
102
+ ### CLI (zero code change)
103
+
104
+ ```bash
105
+ # Pipe any agent's stdout
106
+ python my_agent.py 2>&1 | loop-guard watch
107
+
108
+ # Watch a log file
109
+ loop-guard watch --file agent.log --follow
110
+
111
+ # Watch git commits (for autoresearch)
112
+ loop-guard watch --git-dir ./experiments/ --poll 30
113
+
114
+ # Check a transcript
115
+ loop-guard check --input transcript.txt --format html
116
+ ```
117
+
118
+ ## What Gets Verified
119
+
120
+ ### Verifiers
121
+
122
+ | Verifier | Layer | What it catches |
123
+ |----------|-------|----------------|
124
+ | **LoopTrapVerifier** | L2 | Agent stuck retrying the same failing approach |
125
+ | **RegressionVerifier** | L2 | Agent reverts a file to a previous version |
126
+ | **CitationVerifier** | L1 | Hallucinated academic citations (CrossRef + Semantic Scholar) |
127
+ | **StatisticalVerifier** | L2 | Impossible p-values, missing multiple comparison correction, small samples |
128
+ | **CodeOutputVerifier** | L1 | Agent claims code produced output X, but re-execution produces Y |
129
+ | **MetricVerifier** | L1 | Agent claims metric = X, but re-computation gives Y |
130
+
131
+ ### Claim Extraction
132
+
133
+ Claims are extracted from agent output using a **regex-first pipeline**:
134
+
135
+ 1. **Regex patterns** catch citations, metrics, p-values, test results, and file modifications
136
+ 2. **LLM extraction** (optional) handles remaining unstructured text
137
+ 3. Claims are typed: `CODE_OUTPUT`, `METRIC`, `STATISTICAL`, `CITATION`, `TEST_RESULT`, `FILE_STATE`, `GENERAL`
138
+
139
+ ## Output
140
+
141
+ ### Terminal (real-time)
142
+
143
+ ```
144
+ [loop-guard] Step 4 [FAIL] [L2] Impossible statistical value: accuracy = 105.3% (> 100%)
145
+ Expected: accuracy ∈ [0%, 100%]
146
+ Actual: 105.3%
147
+ [loop-guard] Step 3 [WARN] [L1] Citation not found in CrossRef or Semantic Scholar
148
+ Expected: Fakenstein et al. 2025
149
+ Actual: No matching paper found
150
+ [loop-guard] Step 7 [WARN] [L2] Agent appears stuck: 3 consecutive similar outputs
151
+ ```
152
+
153
+ ### JSON Report
154
+
155
+ ```bash
156
+ loop-guard check --input transcript.txt --output report.json --format json
157
+ ```
158
+
159
+ ### HTML Report
160
+
161
+ ```bash
162
+ loop-guard check --input transcript.txt --output report.html --format html
163
+ ```
164
+
165
+ Produces a styled, shareable HTML report with findings grouped by step, color-coded by severity.
166
+
167
+ ## Configuration
168
+
169
+ ```python
170
+ guard = LoopGuard(config={
171
+ # Claim extraction
172
+ "use_llm_extraction": True, # Enable LLM fallback extraction
173
+ "extraction_model": "claude-haiku-4-5-20251001", # Model for extraction
174
+
175
+ # Verification
176
+ "sandbox_dir": "/tmp/loopguard_sandbox", # Code execution sandbox
177
+ "timeout": 60, # Sandbox timeout (seconds)
178
+
179
+ # Loop trap detection
180
+ "similarity_threshold": 0.8, # Output similarity threshold
181
+ "consecutive_limit": 3, # Consecutive similar outputs to trigger
182
+
183
+ # Reporting
184
+ "verbosity": "findings_only", # all | findings_only | failures_only
185
+ })
186
+ ```
187
+
188
+ ## Architecture
189
+
190
+ ```
191
+ loop_guard/
192
+ ├── __init__.py # Public API
193
+ ├── models.py # ClaimType, Verdict, Finding, etc.
194
+ ├── extractor.py # Regex-first claim extraction
195
+ ├── engine.py # Verification routing engine
196
+ ├── reporter.py # Terminal, JSON, HTML output
197
+ ├── guard.py # LoopGuard (main entry point)
198
+ ├── cli.py # CLI (loop-guard watch/check/report)
199
+ └── verifiers/
200
+ ├── loop_trap.py # Stuck loop detection
201
+ ├── regression.py # File regression detection
202
+ ├── citation.py # CrossRef + Semantic Scholar lookup
203
+ ├── statistical.py # Statistical sanity checks
204
+ ├── code_output.py # Code re-execution
205
+ └── metric.py # Metric re-computation
206
+ ```
207
+
208
+ ## What loop-guard Is NOT
209
+
210
+ - **NOT an LLM-as-judge system.** Those share the same failure modes as the agent being judged.
211
+ - **NOT a prompt injection detector.** Use dedicated security tools for that.
212
+ - **NOT a post-hoc evaluation tool.** It runs in-loop, catching errors as they happen.
213
+ - **NOT a replacement for human review.** It flags issues for humans to investigate.
214
+
215
+ ## Examples
216
+
217
+ See the [`examples/`](examples/) directory:
218
+
219
+ - `autoresearch_demo.py` — ML experiment loop with metric/citation/loop-trap verification
220
+ - `openai_sdk_demo.py` — Coding agent integration pattern
221
+ - `adk_demo.py` — Data analysis agent with statistical verification
222
+
223
+ ## Contributing
224
+
225
+ ```bash
226
+ git clone https://github.com/ybkim95/loop-guard
227
+ cd loop-guard
228
+ pip install -e ".[dev]"
229
+ pytest
230
+ ```
231
+
232
+ ## License
233
+
234
+ MIT
@@ -0,0 +1,197 @@
1
+ # loop-guard
2
+
3
+ **Deterministic verification for autonomous agent loops.**
4
+
5
+ loop-guard catches silent errors in agent loops by re-running code, looking up citations, and checking statistics — not by asking another LLM if the output "looks right."
6
+
7
+ [![PyPI](https://img.shields.io/pypi/v/loop-guard)](https://pypi.org/project/loop-guard/)
8
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue)](https://python.org)
9
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
10
+
11
+ ## The Problem
12
+
13
+ Autonomous agent loops run for hours without human oversight. Agents make intermediate claims — "accuracy is 94%", "tests pass", "p < 0.05" — that compound over hundreds of steps. A wrong claim at step 23 becomes the premise for steps 24–100. Nobody catches the error until a human reviews the final output, if at all.
14
+
15
+ ## How loop-guard Works
16
+
17
+ ```
18
+ Agent Loop → Integration Layer → Claim Extractor → Verification Engine → Reporter
19
+ (regex-first) (3 layers) (terminal/JSON/HTML)
20
+ ```
21
+
22
+ **Three verification layers, in order of reliability:**
23
+
24
+ | Layer | Method | Reliability | Example |
25
+ |-------|--------|-------------|---------|
26
+ | **L1: Deterministic** | Re-execute code, API lookup, re-compute | Cannot be wrong | Citation lookup, code re-run |
27
+ | **L2: Rule-based** | Pattern matching, sanity checks | Rarely wrong | p > 1 detection, loop trap |
28
+ | **L3: LLM-assisted** | Soft flagging only | May be wrong | General claim flagging |
29
+
30
+ **Key principle:** Verification must be more reliable than the thing being verified. LLMs are used only for claim extraction (a structured task), never for judgment.
31
+
32
+ ## Install
33
+
34
+ ```bash
35
+ pip install loop-guard
36
+
37
+ # With LLM-based claim extraction (optional)
38
+ pip install loop-guard[llm]
39
+ ```
40
+
41
+ ## Quick Start
42
+
43
+ ### Python API (2 lines to integrate)
44
+
45
+ ```python
46
+ from loop_guard import LoopGuard
47
+
48
+ guard = LoopGuard()
49
+
50
+ # Works with ANY agent loop — OpenAI, Anthropic, Google ADK, LangGraph, custom
51
+ for i in range(num_experiments):
52
+ result = agent.run(task)
53
+ findings = guard.step(
54
+ output=result.text,
55
+ code=result.code_executed, # optional
56
+ files=result.files_modified, # optional
57
+ )
58
+ for f in findings:
59
+ print(f) # FAIL/WARN/FLAG with explanation
60
+
61
+ # Generate report
62
+ guard.report(format="html", path="audit.html")
63
+ ```
64
+
65
+ ### CLI (zero code change)
66
+
67
+ ```bash
68
+ # Pipe any agent's stdout
69
+ python my_agent.py 2>&1 | loop-guard watch
70
+
71
+ # Watch a log file
72
+ loop-guard watch --file agent.log --follow
73
+
74
+ # Watch git commits (for autoresearch)
75
+ loop-guard watch --git-dir ./experiments/ --poll 30
76
+
77
+ # Check a transcript
78
+ loop-guard check --input transcript.txt --format html
79
+ ```
80
+
81
+ ## What Gets Verified
82
+
83
+ ### Verifiers
84
+
85
+ | Verifier | Layer | What it catches |
86
+ |----------|-------|----------------|
87
+ | **LoopTrapVerifier** | L2 | Agent stuck retrying the same failing approach |
88
+ | **RegressionVerifier** | L2 | Agent reverts a file to a previous version |
89
+ | **CitationVerifier** | L1 | Hallucinated academic citations (CrossRef + Semantic Scholar) |
90
+ | **StatisticalVerifier** | L2 | Impossible p-values, missing multiple comparison correction, small samples |
91
+ | **CodeOutputVerifier** | L1 | Agent claims code produced output X, but re-execution produces Y |
92
+ | **MetricVerifier** | L1 | Agent claims metric = X, but re-computation gives Y |
93
+
94
+ ### Claim Extraction
95
+
96
+ Claims are extracted from agent output using a **regex-first pipeline**:
97
+
98
+ 1. **Regex patterns** catch citations, metrics, p-values, test results, and file modifications
99
+ 2. **LLM extraction** (optional) handles remaining unstructured text
100
+ 3. Claims are typed: `CODE_OUTPUT`, `METRIC`, `STATISTICAL`, `CITATION`, `TEST_RESULT`, `FILE_STATE`, `GENERAL`
101
+
102
+ ## Output
103
+
104
+ ### Terminal (real-time)
105
+
106
+ ```
107
+ [loop-guard] Step 4 [FAIL] [L2] Impossible statistical value: accuracy = 105.3% (> 100%)
108
+ Expected: accuracy ∈ [0%, 100%]
109
+ Actual: 105.3%
110
+ [loop-guard] Step 3 [WARN] [L1] Citation not found in CrossRef or Semantic Scholar
111
+ Expected: Fakenstein et al. 2025
112
+ Actual: No matching paper found
113
+ [loop-guard] Step 7 [WARN] [L2] Agent appears stuck: 3 consecutive similar outputs
114
+ ```
115
+
116
+ ### JSON Report
117
+
118
+ ```bash
119
+ loop-guard check --input transcript.txt --output report.json --format json
120
+ ```
121
+
122
+ ### HTML Report
123
+
124
+ ```bash
125
+ loop-guard check --input transcript.txt --output report.html --format html
126
+ ```
127
+
128
+ Produces a styled, shareable HTML report with findings grouped by step, color-coded by severity.
129
+
130
+ ## Configuration
131
+
132
+ ```python
133
+ guard = LoopGuard(config={
134
+ # Claim extraction
135
+ "use_llm_extraction": True, # Enable LLM fallback extraction
136
+ "extraction_model": "claude-haiku-4-5-20251001", # Model for extraction
137
+
138
+ # Verification
139
+ "sandbox_dir": "/tmp/loopguard_sandbox", # Code execution sandbox
140
+ "timeout": 60, # Sandbox timeout (seconds)
141
+
142
+ # Loop trap detection
143
+ "similarity_threshold": 0.8, # Output similarity threshold
144
+ "consecutive_limit": 3, # Consecutive similar outputs to trigger
145
+
146
+ # Reporting
147
+ "verbosity": "findings_only", # all | findings_only | failures_only
148
+ })
149
+ ```
150
+
151
+ ## Architecture
152
+
153
+ ```
154
+ loop_guard/
155
+ ├── __init__.py # Public API
156
+ ├── models.py # ClaimType, Verdict, Finding, etc.
157
+ ├── extractor.py # Regex-first claim extraction
158
+ ├── engine.py # Verification routing engine
159
+ ├── reporter.py # Terminal, JSON, HTML output
160
+ ├── guard.py # LoopGuard (main entry point)
161
+ ├── cli.py # CLI (loop-guard watch/check/report)
162
+ └── verifiers/
163
+ ├── loop_trap.py # Stuck loop detection
164
+ ├── regression.py # File regression detection
165
+ ├── citation.py # CrossRef + Semantic Scholar lookup
166
+ ├── statistical.py # Statistical sanity checks
167
+ ├── code_output.py # Code re-execution
168
+ └── metric.py # Metric re-computation
169
+ ```
170
+
171
+ ## What loop-guard Is NOT
172
+
173
+ - **NOT an LLM-as-judge system.** Those share the same failure modes as the agent being judged.
174
+ - **NOT a prompt injection detector.** Use dedicated security tools for that.
175
+ - **NOT a post-hoc evaluation tool.** It runs in-loop, catching errors as they happen.
176
+ - **NOT a replacement for human review.** It flags issues for humans to investigate.
177
+
178
+ ## Examples
179
+
180
+ See the [`examples/`](examples/) directory:
181
+
182
+ - `autoresearch_demo.py` — ML experiment loop with metric/citation/loop-trap verification
183
+ - `openai_sdk_demo.py` — Coding agent integration pattern
184
+ - `adk_demo.py` — Data analysis agent with statistical verification
185
+
186
+ ## Contributing
187
+
188
+ ```bash
189
+ git clone https://github.com/ybkim95/loop-guard
190
+ cd loop-guard
191
+ pip install -e ".[dev]"
192
+ pytest
193
+ ```
194
+
195
+ ## License
196
+
197
+ MIT