lyingdocs 0.1.2__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyingdocs-0.1.4/PKG-INFO +275 -0
- lyingdocs-0.1.4/README.md +259 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/argus.py +0 -7
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/argus_claude_code.py +16 -4
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/argus_local.py +5 -2
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/cli.py +90 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/config.py +35 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/hermes_agent.py +14 -6
- lyingdocs-0.1.4/lyingdocs/init_ci.py +357 -0
- lyingdocs-0.1.4/lyingdocs/llm.py +341 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/prompts/agent_system.txt +24 -13
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/prompts/argus_local_system.txt +9 -3
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/pyproject.toml +2 -1
- lyingdocs-0.1.2/PKG-INFO +0 -269
- lyingdocs-0.1.2/README.md +0 -254
- lyingdocs-0.1.2/lyingdocs/llm.py +0 -94
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/.gitignore +0 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/LICENSE +0 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/__init__.py +0 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/__main__.py +0 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/codex.py +0 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/doctree.py +0 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/prompts/codex_task.txt +0 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/prompts/issue_generator.txt +0 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/prompts/report_synthesis.txt +0 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/tools.py +0 -0
- {lyingdocs-0.1.2 → lyingdocs-0.1.4}/lyingdocs/workspace.py +0 -0
lyingdocs-0.1.4/PKG-INFO
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lyingdocs
|
|
3
|
+
Version: 0.1.4
|
|
4
|
+
Summary: Autonomous documentation-code misalignment detection using LLM agents
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Requires-Dist: anthropic>=0.40
|
|
9
|
+
Requires-Dist: openai>=1.0
|
|
10
|
+
Requires-Dist: python-dotenv>=1.0
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: build; extra == 'dev'
|
|
13
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
14
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
<p align="center">
|
|
18
|
+
<img src="assets/logo.png" alt="LyingDocs" width="200" />
|
|
19
|
+
</p>
|
|
20
|
+
|
|
21
|
+
<h1 align="center">LyingDocs</h1>
|
|
22
|
+
|
|
23
|
+
<p align="center">
|
|
24
|
+
A trust layer for your repository.
|
|
25
|
+
</p>
|
|
26
|
+
|
|
27
|
+
<p align="center">
|
|
28
|
+
Detect when your docs, code, configs, and examples stop agreeing with each other.
|
|
29
|
+
</p>
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
Modern repositories are read by more than humans.
|
|
34
|
+
|
|
35
|
+
They are read by teammates, new contributors, users, reviewers, downstream integrators — and increasingly by AI agents.
|
|
36
|
+
|
|
37
|
+
That only works if the repository can be trusted.
|
|
38
|
+
|
|
39
|
+
But trust quietly erodes over time:
|
|
40
|
+
|
|
41
|
+
- documentation describes features that were never shipped
|
|
42
|
+
- code behavior drifts away from the spec
|
|
43
|
+
- examples stop matching reality
|
|
44
|
+
- values claimed to be configurable are hardcoded deep in the codebase
|
|
45
|
+
- papers and implementation tell different stories
|
|
46
|
+
|
|
47
|
+
**LyingDocs is a trust layer for your repository.**
|
|
48
|
+
It audits the gap between what your repo *says* and what your code *actually does* — before your users, contributors, or agents learn the wrong thing.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Why LyingDocs exists
|
|
53
|
+
|
|
54
|
+
Every codebase accumulates invisible trust debt.
|
|
55
|
+
|
|
56
|
+
In the age of fast iteration and LLM-assisted development, teams now ship code and documentation faster than ever — but not always in sync. A repo may still look polished while becoming progressively less reliable as a source of truth.
|
|
57
|
+
|
|
58
|
+
That is the problem LyingDocs is built to solve.
|
|
59
|
+
|
|
60
|
+
LyingDocs is not just a documentation checker. It is a system for surfacing **repository misalignment**:
|
|
61
|
+
|
|
62
|
+
- docs that overclaim
|
|
63
|
+
- code paths that are undocumented
|
|
64
|
+
- specs that no longer match implementation
|
|
65
|
+
- "configurable" behavior that is actually fixed
|
|
66
|
+
- claims in papers or READMEs that cannot be supported by the code
|
|
67
|
+
|
|
68
|
+
The goal is simple:
|
|
69
|
+
|
|
70
|
+
> Keep your repository trustworthy for humans and machines.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## What LyingDocs does
|
|
75
|
+
|
|
76
|
+
LyingDocs deploys two autonomous agents against your repository:
|
|
77
|
+
|
|
78
|
+
- **Hermes** reads your documentation, plans an audit strategy, and decides what needs to be verified
|
|
79
|
+
- **Argus** investigates the actual codebase and reports what the code really does
|
|
80
|
+
|
|
81
|
+
Hermes then reconciles the two and writes a structured report of the mismatches it finds.
|
|
82
|
+
|
|
83
|
+
This lets you catch cases where your repository is no longer telling the truth about itself.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## How it works
|
|
88
|
+
|
|
89
|
+
### 1. Hermes reads what the repo claims
|
|
90
|
+
|
|
91
|
+
Hermes traverses your documentation and extracts claims, assumptions, and implementation promises from sources such as:
|
|
92
|
+
|
|
93
|
+
- docs/
|
|
94
|
+
- README files
|
|
95
|
+
- setup guides
|
|
96
|
+
- usage examples
|
|
97
|
+
- configuration references
|
|
98
|
+
- papers and research writeups
|
|
99
|
+
|
|
100
|
+
It then plans an audit by turning those claims into targeted investigation tasks.
|
|
101
|
+
|
|
102
|
+
### 2. Argus checks what the code actually does
|
|
103
|
+
|
|
104
|
+
Argus executes each task against your real codebase.
|
|
105
|
+
|
|
106
|
+
You can choose the backend that best fits your setup:
|
|
107
|
+
|
|
108
|
+
- **`codex`** — [OpenAI Codex CLI](https://github.com/openai/codex) subprocess
|
|
109
|
+
- **`claude_code`** — [Claude Code](https://docs.anthropic.com/claude/docs/claude-code) CLI subprocess (`claude -p`)
|
|
110
|
+
- **`local`** — built-in minimal agent loop using filesystem tools and any OpenAI-compatible API directly
|
|
111
|
+
|
|
112
|
+
### 3. LyingDocs reports the trust gaps
|
|
113
|
+
|
|
114
|
+
Hermes reconciles documented claims with observed implementation behavior and outputs a report of misalignments.
|
|
115
|
+
|
|
116
|
+
These findings can then be reviewed by maintainers, turned into issues, and eventually enforced in CI.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Positioning
|
|
121
|
+
|
|
122
|
+
LyingDocs is best thought of as:
|
|
123
|
+
|
|
124
|
+
- a **trust layer** for your repo
|
|
125
|
+
- a **docs-to-code alignment guard**
|
|
126
|
+
- a **pre-user warning system** for misleading documentation
|
|
127
|
+
- a future **CI / GitHub Action quality gate** for repository truthfulness
|
|
128
|
+
|
|
129
|
+
It is not meant to be a tool you manually open every day.
|
|
130
|
+
|
|
131
|
+
It is meant to become something your repository runs automatically:
|
|
132
|
+
|
|
133
|
+
- on pull requests
|
|
134
|
+
- before releases
|
|
135
|
+
- during scheduled audits
|
|
136
|
+
- before docs deployment
|
|
137
|
+
- as part of your GitHub Actions workflow
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## Installation
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
pip install lyingdocs
|
|
145
|
+
````
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Quick Start
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
export OPENAI_API_KEY="sk-..."
|
|
153
|
+
|
|
154
|
+
lyingdocs analyze --doc-path docs/ --code-path . -o output/audit
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
This performs a full audit of your repository and produces a report describing where documentation and implementation no longer align.
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## Documentation
|
|
162
|
+
|
|
163
|
+
| | |
|
|
164
|
+
| --- | --- |
|
|
165
|
+
| [Configuration](docs/configuration.md) | Config file schema, environment variables, layer resolution |
|
|
166
|
+
| [Argus Backends](docs/backends.md) | Setup for `local`, `codex`, and `claude_code` |
|
|
167
|
+
| [CLI Reference](docs/cli.md) | All flags, commands, and output artifacts |
|
|
168
|
+
| [GitHub Actions](docs/guides/github-actions.md) | CI integration, authentication, triggers, and approval gates |
|
|
169
|
+
| [GitHub Issues](docs/guides/github-issues.md) | Using `--gen-issue` to draft and post issues |
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## Example use cases
|
|
174
|
+
|
|
175
|
+
Use LyingDocs when you want to answer questions like:
|
|
176
|
+
|
|
177
|
+
* Does the README still reflect the real behavior of the project?
|
|
178
|
+
* Are our examples and quickstarts still valid?
|
|
179
|
+
* Did code change without the docs changing with it?
|
|
180
|
+
* Are we claiming configuration that does not actually exist?
|
|
181
|
+
* Does our paper describe behavior the implementation does not support?
|
|
182
|
+
* Can an AI agent trust this repository as a source of truth?
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## Misalignment categories
|
|
187
|
+
|
|
188
|
+
| Category | Description |
|
|
189
|
+
| ------------------ | ----------------------------------------------------- |
|
|
190
|
+
| **LogicMismatch** | Code contradicts documentation |
|
|
191
|
+
| **PhantomSpec** | Documentation describes non-existent features |
|
|
192
|
+
| **ShadowLogic** | Important code behavior exists but is undocumented |
|
|
193
|
+
| **HardcodedDrift** | Supposedly configurable values are actually hardcoded |
|
|
194
|
+
|
|
195
|
+
These categories represent different ways repository trust breaks down.
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## GitHub Actions
|
|
200
|
+
|
|
201
|
+
LyingDocs runs natively in GitHub Actions as a trust gate for your CI pipeline.
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
pip install lyingdocs
|
|
205
|
+
lyingdocs init-ci --doc-path docs/ --backend claude_code --claude-oauth --trigger tag,manual
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
This generates a workflow that:
|
|
209
|
+
|
|
210
|
+
* audits docs-vs-code alignment on every tag push (or on demand)
|
|
211
|
+
* posts findings as a PR comment
|
|
212
|
+
* optionally requires manual approval before merging
|
|
213
|
+
|
|
214
|
+
Supports all three backends (`local`, `codex`, `claude_code`) and both API key and OAuth token authentication for Claude Code.
|
|
215
|
+
|
|
216
|
+
See the [full setup guide](docs/guides/github-actions.md) for trigger options, approval gates, and backend configuration.
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Roadmap
|
|
221
|
+
|
|
222
|
+
* [x] **Multi-harness support** — Argus runs on Codex, Claude Code, or a built-in local agent
|
|
223
|
+
* [x] **Issue generation** — `--gen-issue` drafts GitHub issues from findings
|
|
224
|
+
* [x] **GitHub Action integration** — `lyingdocs init-ci` generates a ready-to-use workflow with configurable triggers, backend selection, PR comments, and manual approval gates
|
|
225
|
+
* [ ] **One-session memory support** — Argus backends retain state across tasks for deeper multi-step investigations
|
|
226
|
+
* [ ] **Deeper analysis** — multi-hop reasoning across doc hierarchies and version-aware diffing to detect when code changed but docs did not
|
|
227
|
+
* [ ] **Paper mode** — treat academic papers as documentation and detect paper-to-code misalignment
|
|
228
|
+
* [ ] **Auto-fix mode** — Hermes proposes doc patches for human review and application
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## For researchers
|
|
233
|
+
|
|
234
|
+
A paper is also documentation.
|
|
235
|
+
|
|
236
|
+
It is a human-language description of code, behavior, claims, and expected results — often written under deadline, and often drifting away from the implementation over time.
|
|
237
|
+
|
|
238
|
+
If you want to know whether:
|
|
239
|
+
|
|
240
|
+
* your repo matches your paper
|
|
241
|
+
* your claims are supported by the code
|
|
242
|
+
* another researcher can trust your implementation
|
|
243
|
+
|
|
244
|
+
then LyingDocs can help.
|
|
245
|
+
|
|
246
|
+
The problem is the same.
|
|
247
|
+
Paper is documentation for code.
|
|
248
|
+
LyingDocs is for papers too.
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Why “trust layer”
|
|
253
|
+
|
|
254
|
+
Because the problem is bigger than stale docs.
|
|
255
|
+
|
|
256
|
+
A repository becomes untrustworthy whenever its outward description and inward behavior drift apart.
|
|
257
|
+
|
|
258
|
+
That harms:
|
|
259
|
+
|
|
260
|
+
* users trying to adopt the project
|
|
261
|
+
* contributors trying to extend it
|
|
262
|
+
* maintainers trying to review changes
|
|
263
|
+
* researchers trying to reproduce results
|
|
264
|
+
* AI agents trying to understand the repo
|
|
265
|
+
|
|
266
|
+
LyingDocs exists to make that gap visible.
|
|
267
|
+
|
|
268
|
+
Not after users complain.
|
|
269
|
+
Before.
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## License
|
|
274
|
+
|
|
275
|
+
MIT
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="assets/logo.png" alt="LyingDocs" width="200" />
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">LyingDocs</h1>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
A trust layer for your repository.
|
|
9
|
+
</p>
|
|
10
|
+
|
|
11
|
+
<p align="center">
|
|
12
|
+
Detect when your docs, code, configs, and examples stop agreeing with each other.
|
|
13
|
+
</p>
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
Modern repositories are read by more than humans.
|
|
18
|
+
|
|
19
|
+
They are read by teammates, new contributors, users, reviewers, downstream integrators — and increasingly by AI agents.
|
|
20
|
+
|
|
21
|
+
That only works if the repository can be trusted.
|
|
22
|
+
|
|
23
|
+
But trust quietly erodes over time:
|
|
24
|
+
|
|
25
|
+
- documentation describes features that were never shipped
|
|
26
|
+
- code behavior drifts away from the spec
|
|
27
|
+
- examples stop matching reality
|
|
28
|
+
- values claimed to be configurable are hardcoded deep in the codebase
|
|
29
|
+
- papers and implementation tell different stories
|
|
30
|
+
|
|
31
|
+
**LyingDocs is a trust layer for your repository.**
|
|
32
|
+
It audits the gap between what your repo *says* and what your code *actually does* — before your users, contributors, or agents learn the wrong thing.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Why LyingDocs exists
|
|
37
|
+
|
|
38
|
+
Every codebase accumulates invisible trust debt.
|
|
39
|
+
|
|
40
|
+
In the age of fast iteration and LLM-assisted development, teams now ship code and documentation faster than ever — but not always in sync. A repo may still look polished while becoming progressively less reliable as a source of truth.
|
|
41
|
+
|
|
42
|
+
That is the problem LyingDocs is built to solve.
|
|
43
|
+
|
|
44
|
+
LyingDocs is not just a documentation checker. It is a system for surfacing **repository misalignment**:
|
|
45
|
+
|
|
46
|
+
- docs that overclaim
|
|
47
|
+
- code paths that are undocumented
|
|
48
|
+
- specs that no longer match implementation
|
|
49
|
+
- "configurable" behavior that is actually fixed
|
|
50
|
+
- claims in papers or READMEs that cannot be supported by the code
|
|
51
|
+
|
|
52
|
+
The goal is simple:
|
|
53
|
+
|
|
54
|
+
> Keep your repository trustworthy for humans and machines.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## What LyingDocs does
|
|
59
|
+
|
|
60
|
+
LyingDocs deploys two autonomous agents against your repository:
|
|
61
|
+
|
|
62
|
+
- **Hermes** reads your documentation, plans an audit strategy, and decides what needs to be verified
|
|
63
|
+
- **Argus** investigates the actual codebase and reports what the code really does
|
|
64
|
+
|
|
65
|
+
Hermes then reconciles the two and writes a structured report of the mismatches it finds.
|
|
66
|
+
|
|
67
|
+
This lets you catch cases where your repository is no longer telling the truth about itself.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## How it works
|
|
72
|
+
|
|
73
|
+
### 1. Hermes reads what the repo claims
|
|
74
|
+
|
|
75
|
+
Hermes traverses your documentation and extracts claims, assumptions, and implementation promises from sources such as:
|
|
76
|
+
|
|
77
|
+
- docs/
|
|
78
|
+
- README files
|
|
79
|
+
- setup guides
|
|
80
|
+
- usage examples
|
|
81
|
+
- configuration references
|
|
82
|
+
- papers and research writeups
|
|
83
|
+
|
|
84
|
+
It then plans an audit by turning those claims into targeted investigation tasks.
|
|
85
|
+
|
|
86
|
+
### 2. Argus checks what the code actually does
|
|
87
|
+
|
|
88
|
+
Argus executes each task against your real codebase.
|
|
89
|
+
|
|
90
|
+
You can choose the backend that best fits your setup:
|
|
91
|
+
|
|
92
|
+
- **`codex`** — [OpenAI Codex CLI](https://github.com/openai/codex) subprocess
|
|
93
|
+
- **`claude_code`** — [Claude Code](https://docs.anthropic.com/claude/docs/claude-code) CLI subprocess (`claude -p`)
|
|
94
|
+
- **`local`** — built-in minimal agent loop using filesystem tools and any OpenAI-compatible API directly
|
|
95
|
+
|
|
96
|
+
### 3. LyingDocs reports the trust gaps
|
|
97
|
+
|
|
98
|
+
Hermes reconciles documented claims with observed implementation behavior and outputs a report of misalignments.
|
|
99
|
+
|
|
100
|
+
These findings can then be reviewed by maintainers, turned into issues, and eventually enforced in CI.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Positioning
|
|
105
|
+
|
|
106
|
+
LyingDocs is best thought of as:
|
|
107
|
+
|
|
108
|
+
- a **trust layer** for your repo
|
|
109
|
+
- a **docs-to-code alignment guard**
|
|
110
|
+
- a **pre-user warning system** for misleading documentation
|
|
111
|
+
- a future **CI / GitHub Action quality gate** for repository truthfulness
|
|
112
|
+
|
|
113
|
+
It is not meant to be a tool you manually open every day.
|
|
114
|
+
|
|
115
|
+
It is meant to become something your repository runs automatically:
|
|
116
|
+
|
|
117
|
+
- on pull requests
|
|
118
|
+
- before releases
|
|
119
|
+
- during scheduled audits
|
|
120
|
+
- before docs deployment
|
|
121
|
+
- as part of your GitHub Actions workflow
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Installation
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
pip install lyingdocs
|
|
129
|
+
````
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Quick Start
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
export OPENAI_API_KEY="sk-..."
|
|
137
|
+
|
|
138
|
+
lyingdocs analyze --doc-path docs/ --code-path . -o output/audit
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
This performs a full audit of your repository and produces a report describing where documentation and implementation no longer align.
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Documentation
|
|
146
|
+
|
|
147
|
+
| | |
|
|
148
|
+
| --- | --- |
|
|
149
|
+
| [Configuration](docs/configuration.md) | Config file schema, environment variables, layer resolution |
|
|
150
|
+
| [Argus Backends](docs/backends.md) | Setup for `local`, `codex`, and `claude_code` |
|
|
151
|
+
| [CLI Reference](docs/cli.md) | All flags, commands, and output artifacts |
|
|
152
|
+
| [GitHub Actions](docs/guides/github-actions.md) | CI integration, authentication, triggers, and approval gates |
|
|
153
|
+
| [GitHub Issues](docs/guides/github-issues.md) | Using `--gen-issue` to draft and post issues |
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Example use cases
|
|
158
|
+
|
|
159
|
+
Use LyingDocs when you want to answer questions like:
|
|
160
|
+
|
|
161
|
+
* Does the README still reflect the real behavior of the project?
|
|
162
|
+
* Are our examples and quickstarts still valid?
|
|
163
|
+
* Did code change without the docs changing with it?
|
|
164
|
+
* Are we claiming configuration that does not actually exist?
|
|
165
|
+
* Does our paper describe behavior the implementation does not support?
|
|
166
|
+
* Can an AI agent trust this repository as a source of truth?
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## Misalignment categories
|
|
171
|
+
|
|
172
|
+
| Category | Description |
|
|
173
|
+
| ------------------ | ----------------------------------------------------- |
|
|
174
|
+
| **LogicMismatch** | Code contradicts documentation |
|
|
175
|
+
| **PhantomSpec** | Documentation describes non-existent features |
|
|
176
|
+
| **ShadowLogic** | Important code behavior exists but is undocumented |
|
|
177
|
+
| **HardcodedDrift** | Supposedly configurable values are actually hardcoded |
|
|
178
|
+
|
|
179
|
+
These categories represent different ways repository trust breaks down.
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## GitHub Actions
|
|
184
|
+
|
|
185
|
+
LyingDocs runs natively in GitHub Actions as a trust gate for your CI pipeline.
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
pip install lyingdocs
|
|
189
|
+
lyingdocs init-ci --doc-path docs/ --backend claude_code --claude-oauth --trigger tag,manual
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
This generates a workflow that:
|
|
193
|
+
|
|
194
|
+
* audits docs-vs-code alignment on every tag push (or on demand)
|
|
195
|
+
* posts findings as a PR comment
|
|
196
|
+
* optionally requires manual approval before merging
|
|
197
|
+
|
|
198
|
+
Supports all three backends (`local`, `codex`, `claude_code`) and both API key and OAuth token authentication for Claude Code.
|
|
199
|
+
|
|
200
|
+
See the [full setup guide](docs/guides/github-actions.md) for trigger options, approval gates, and backend configuration.
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Roadmap
|
|
205
|
+
|
|
206
|
+
* [x] **Multi-harness support** — Argus runs on Codex, Claude Code, or a built-in local agent
|
|
207
|
+
* [x] **Issue generation** — `--gen-issue` drafts GitHub issues from findings
|
|
208
|
+
* [x] **GitHub Action integration** — `lyingdocs init-ci` generates a ready-to-use workflow with configurable triggers, backend selection, PR comments, and manual approval gates
|
|
209
|
+
* [ ] **One-session memory support** — Argus backends retain state across tasks for deeper multi-step investigations
|
|
210
|
+
* [ ] **Deeper analysis** — multi-hop reasoning across doc hierarchies and version-aware diffing to detect when code changed but docs did not
|
|
211
|
+
* [ ] **Paper mode** — treat academic papers as documentation and detect paper-to-code misalignment
|
|
212
|
+
* [ ] **Auto-fix mode** — Hermes proposes doc patches for human review and application
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## For researchers
|
|
217
|
+
|
|
218
|
+
A paper is also documentation.
|
|
219
|
+
|
|
220
|
+
It is a human-language description of code, behavior, claims, and expected results — often written under deadline, and often drifting away from the implementation over time.
|
|
221
|
+
|
|
222
|
+
If you want to know whether:
|
|
223
|
+
|
|
224
|
+
* your repo matches your paper
|
|
225
|
+
* your claims are supported by the code
|
|
226
|
+
* another researcher can trust your implementation
|
|
227
|
+
|
|
228
|
+
then LyingDocs can help.
|
|
229
|
+
|
|
230
|
+
The problem is the same.
|
|
231
|
+
Paper is documentation for code.
|
|
232
|
+
LyingDocs is for papers too.
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## Why “trust layer”
|
|
237
|
+
|
|
238
|
+
Because the problem is bigger than stale docs.
|
|
239
|
+
|
|
240
|
+
A repository becomes untrustworthy whenever its outward description and inward behavior drift apart.
|
|
241
|
+
|
|
242
|
+
That harms:
|
|
243
|
+
|
|
244
|
+
* users trying to adopt the project
|
|
245
|
+
* contributors trying to extend it
|
|
246
|
+
* maintainers trying to review changes
|
|
247
|
+
* researchers trying to reproduce results
|
|
248
|
+
* AI agents trying to understand the repo
|
|
249
|
+
|
|
250
|
+
LyingDocs exists to make that gap visible.
|
|
251
|
+
|
|
252
|
+
Not after users complain.
|
|
253
|
+
Before.
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## License
|
|
258
|
+
|
|
259
|
+
MIT
|
|
@@ -16,14 +16,9 @@ class ArgusDispatcher:
|
|
|
16
16
|
def __init__(self, config: dict):
|
|
17
17
|
self.config = config
|
|
18
18
|
self.backend = config["argus_backend"]
|
|
19
|
-
self._resolved = False
|
|
20
19
|
self._codex_bin: str | None = None
|
|
21
20
|
self._claude_bin: str | None = None
|
|
22
21
|
|
|
23
|
-
def _resolve_once(self) -> None:
|
|
24
|
-
if self._resolved:
|
|
25
|
-
return
|
|
26
|
-
self._resolved = True
|
|
27
22
|
if self.backend == "codex":
|
|
28
23
|
self._codex_bin = find_codex_binary(self.config)
|
|
29
24
|
if self._codex_bin:
|
|
@@ -56,8 +51,6 @@ class ArgusDispatcher:
|
|
|
56
51
|
task_id: str,
|
|
57
52
|
focus_paths: list[str] | None = None,
|
|
58
53
|
) -> str:
|
|
59
|
-
self._resolve_once()
|
|
60
|
-
|
|
61
54
|
if self.backend == "codex":
|
|
62
55
|
return run_codex_task(
|
|
63
56
|
self.config,
|
|
@@ -49,12 +49,24 @@ def run_claude_code_task(
|
|
|
49
49
|
focus_section = f"\nPriority files/directories:\n{paths_str}\n"
|
|
50
50
|
|
|
51
51
|
full_prompt = (
|
|
52
|
-
f"You are Argus, a code analyst
|
|
52
|
+
f"You are Argus, a code analyst. Your job is to verify a specific claim "
|
|
53
|
+
f"about a codebase by reading the actual source files.\n\n"
|
|
53
54
|
f"Task: {task_description}\n"
|
|
54
55
|
f"{focus_section}\n"
|
|
55
|
-
f"
|
|
56
|
-
f"
|
|
57
|
-
f"
|
|
56
|
+
f"Code root: {code_path.resolve()}\n\n"
|
|
57
|
+
f"Instructions:\n"
|
|
58
|
+
f"1. Explore the relevant code areas and READ the files that bear on the claim in full — "
|
|
59
|
+
f"do not rely on search snippets or filenames alone.\n"
|
|
60
|
+
f"2. Pay special attention to: mathematical formulas and equations (verify "
|
|
61
|
+
f"operator-by-operator), numerical constants and thresholds, algorithm control flow, "
|
|
62
|
+
f"and configuration key names with their defaults.\n"
|
|
63
|
+
f"3. Ground every conclusion in concrete file:line references with short quoted snippets.\n\n"
|
|
64
|
+
f"Return a report with:\n"
|
|
65
|
+
f"- A one-line verdict: confirmed / contradicted / not-found / partial\n"
|
|
66
|
+
f"- Specific file:line references for every claim\n"
|
|
67
|
+
f"- Short quoted code snippets as evidence\n"
|
|
68
|
+
f"- Any caveats or ambiguities\n\n"
|
|
69
|
+
f"Do NOT speculate. Only report what the code actually shows."
|
|
58
70
|
)
|
|
59
71
|
|
|
60
72
|
output_file = output_dir / f"argus_task_{task_id}.txt"
|
|
@@ -9,7 +9,7 @@ import logging
|
|
|
9
9
|
import re
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
|
|
12
|
-
from .llm import call_llm_with_tools, make_client
|
|
12
|
+
from .llm import LLMResponse, call_llm_with_tools, make_client
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger("lyingdocs")
|
|
15
15
|
|
|
@@ -126,9 +126,11 @@ class LocalArgus:
|
|
|
126
126
|
def __init__(self, config: dict, code_path: Path):
|
|
127
127
|
self.config = config
|
|
128
128
|
self.code_root = code_path.resolve()
|
|
129
|
+
self.provider = config.get("argus_provider", "openai")
|
|
129
130
|
self.client = make_client(
|
|
130
131
|
api_key=config["argus_api_key"],
|
|
131
132
|
base_url=config["argus_base_url"],
|
|
133
|
+
provider=self.provider,
|
|
132
134
|
)
|
|
133
135
|
self.model = config["argus_model"]
|
|
134
136
|
self.max_iterations = int(config.get("argus_local_max_iterations", 25))
|
|
@@ -149,7 +151,8 @@ class LocalArgus:
|
|
|
149
151
|
for iteration in range(1, self.max_iterations + 1):
|
|
150
152
|
logger.info(" Argus(local) iter %d/%d", iteration, self.max_iterations)
|
|
151
153
|
response = call_llm_with_tools(
|
|
152
|
-
self.client, self.model, messages, ARGUS_LOCAL_TOOL_SCHEMAS
|
|
154
|
+
self.client, self.model, messages, ARGUS_LOCAL_TOOL_SCHEMAS,
|
|
155
|
+
provider=self.provider,
|
|
153
156
|
)
|
|
154
157
|
messages.append(self._response_to_message(response))
|
|
155
158
|
|