linksanity 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. linksanity-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +43 -0
  2. linksanity-0.1.0/.github/ISSUE_TEMPLATE/feature_request.yml +24 -0
  3. linksanity-0.1.0/.github/dependabot.yml +13 -0
  4. linksanity-0.1.0/.github/workflows/ci.yml +58 -0
  5. linksanity-0.1.0/.gitignore +31 -0
  6. linksanity-0.1.0/CONTRIBUTING.md +52 -0
  7. linksanity-0.1.0/PKG-INFO +436 -0
  8. linksanity-0.1.0/README.md +392 -0
  9. linksanity-0.1.0/linksanity/__init__.py +3 -0
  10. linksanity-0.1.0/linksanity/__main__.py +3 -0
  11. linksanity-0.1.0/linksanity/checkers/__init__.py +0 -0
  12. linksanity-0.1.0/linksanity/checkers/filesystem.py +136 -0
  13. linksanity-0.1.0/linksanity/checkers/http.py +171 -0
  14. linksanity-0.1.0/linksanity/checkers/playwright.py +228 -0
  15. linksanity-0.1.0/linksanity/cli.py +254 -0
  16. linksanity-0.1.0/linksanity/config.py +104 -0
  17. linksanity-0.1.0/linksanity/crawler.py +125 -0
  18. linksanity-0.1.0/linksanity/parsers/__init__.py +0 -0
  19. linksanity-0.1.0/linksanity/parsers/html.py +42 -0
  20. linksanity-0.1.0/linksanity/parsers/markdown.py +48 -0
  21. linksanity-0.1.0/linksanity/parsers/rst.py +53 -0
  22. linksanity-0.1.0/linksanity/py.typed +0 -0
  23. linksanity-0.1.0/linksanity/queue.py +72 -0
  24. linksanity-0.1.0/linksanity/reporters/__init__.py +26 -0
  25. linksanity-0.1.0/linksanity/reporters/console.py +78 -0
  26. linksanity-0.1.0/linksanity/reporters/csv_reporter.py +39 -0
  27. linksanity-0.1.0/linksanity/reporters/github_reporter.py +108 -0
  28. linksanity-0.1.0/linksanity/reporters/json_reporter.py +28 -0
  29. linksanity-0.1.0/linksanity/reporters/markdown_reporter.py +68 -0
  30. linksanity-0.1.0/linksanity/router.py +72 -0
  31. linksanity-0.1.0/linksanity/scanner.py +77 -0
  32. linksanity-0.1.0/pyproject.toml +95 -0
  33. linksanity-0.1.0/tests/__init__.py +0 -0
  34. linksanity-0.1.0/tests/fixtures/docs/broken.md +7 -0
  35. linksanity-0.1.0/tests/fixtures/docs/external.md +5 -0
  36. linksanity-0.1.0/tests/fixtures/docs/guide.md +5 -0
  37. linksanity-0.1.0/tests/fixtures/docs/index.md +9 -0
  38. linksanity-0.1.0/tests/fixtures/linksanity.toml +6 -0
  39. linksanity-0.1.0/tests/fixtures/sample.html +16 -0
  40. linksanity-0.1.0/tests/fixtures/sample.md +24 -0
  41. linksanity-0.1.0/tests/fixtures/sample.rst +22 -0
  42. linksanity-0.1.0/tests/fixtures/site/index.html +10 -0
  43. linksanity-0.1.0/tests/fixtures/site/page2.html +9 -0
  44. linksanity-0.1.0/tests/integration/__init__.py +0 -0
  45. linksanity-0.1.0/tests/integration/test_crawl_e2e.py +105 -0
  46. linksanity-0.1.0/tests/integration/test_playwright.py +105 -0
  47. linksanity-0.1.0/tests/integration/test_scan_e2e.py +227 -0
  48. linksanity-0.1.0/tests/unit/__init__.py +0 -0
  49. linksanity-0.1.0/tests/unit/test_checkers/__init__.py +0 -0
  50. linksanity-0.1.0/tests/unit/test_checkers/test_filesystem.py +212 -0
  51. linksanity-0.1.0/tests/unit/test_checkers/test_http.py +166 -0
  52. linksanity-0.1.0/tests/unit/test_config.py +117 -0
  53. linksanity-0.1.0/tests/unit/test_crawler.py +202 -0
  54. linksanity-0.1.0/tests/unit/test_parsers/__init__.py +0 -0
  55. linksanity-0.1.0/tests/unit/test_parsers/test_html.py +84 -0
  56. linksanity-0.1.0/tests/unit/test_parsers/test_markdown.py +92 -0
  57. linksanity-0.1.0/tests/unit/test_parsers/test_rst.py +69 -0
  58. linksanity-0.1.0/tests/unit/test_queue.py +90 -0
  59. linksanity-0.1.0/tests/unit/test_reporters/__init__.py +0 -0
  60. linksanity-0.1.0/tests/unit/test_reporters/test_console.py +180 -0
  61. linksanity-0.1.0/tests/unit/test_reporters/test_github.py +184 -0
  62. linksanity-0.1.0/tests/unit/test_reporters/test_json_csv.py +148 -0
  63. linksanity-0.1.0/tests/unit/test_reporters/test_markdown.py +130 -0
  64. linksanity-0.1.0/tests/unit/test_router.py +206 -0
@@ -0,0 +1,43 @@
1
+ name: Bug report
2
+ description: Report a bug in linksanity
3
+ labels: [bug]
4
+ body:
5
+ - type: input
6
+ id: version
7
+ attributes:
8
+ label: linksanity version
9
+ placeholder: "e.g. 0.1.0 (run `linksanity --version`)"
10
+ validations:
11
+ required: true
12
+
13
+ - type: input
14
+ id: python
15
+ attributes:
16
+ label: Python version
17
+ placeholder: "e.g. 3.12.1"
18
+ validations:
19
+ required: true
20
+
21
+ - type: textarea
22
+ id: command
23
+ attributes:
24
+ label: Command
25
+ description: The exact command you ran
26
+ placeholder: linksanity scan ./docs/ --check-anchors
27
+ validations:
28
+ required: true
29
+
30
+ - type: textarea
31
+ id: expected
32
+ attributes:
33
+ label: Expected behavior
34
+ validations:
35
+ required: true
36
+
37
+ - type: textarea
38
+ id: actual
39
+ attributes:
40
+ label: Actual behavior
41
+ description: Paste the full output or error message
42
+ validations:
43
+ required: true
@@ -0,0 +1,24 @@
1
+ name: Feature request
2
+ description: Suggest an improvement or new feature
3
+ labels: [enhancement]
4
+ body:
5
+ - type: textarea
6
+ id: problem
7
+ attributes:
8
+ label: Problem
9
+ description: What problem does this solve?
10
+ validations:
11
+ required: true
12
+
13
+ - type: textarea
14
+ id: solution
15
+ attributes:
16
+ label: Proposed solution
17
+ validations:
18
+ required: true
19
+
20
+ - type: textarea
21
+ id: alternatives
22
+ attributes:
23
+ label: Alternatives considered
24
+ description: Other approaches you thought about
@@ -0,0 +1,13 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "pip"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
7
+ open-pull-requests-limit: 5
8
+
9
+ - package-ecosystem: "github-actions"
10
+ directory: "/"
11
+ schedule:
12
+ interval: "weekly"
13
+ open-pull-requests-limit: 5
@@ -0,0 +1,58 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ test:
13
+ runs-on: ubuntu-latest
14
+ strategy:
15
+ matrix:
16
+ python-version: ["3.11", "3.12"]
17
+
18
+ steps:
19
+ - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
20
+
21
+ - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+
25
+ - name: Install dependencies
26
+ run: pip install -e ".[dev]"
27
+
28
+ - name: Lint
29
+ run: ruff check linksanity/ tests/
30
+
31
+ - name: Type check
32
+ run: mypy linksanity/
33
+
34
+ - name: Test (unit only, no browser)
35
+ run: pytest tests/unit/ -x -q
36
+
37
+ test-browser:
38
+ runs-on: ubuntu-latest
39
+ steps:
40
+ - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
41
+
42
+ - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
43
+ with:
44
+ python-version: "3.12"
45
+
46
+ - name: Install dependencies
47
+ run: pip install -e ".[dev,browser]"
48
+
49
+ - name: Install Playwright browsers
50
+ run: playwright install --with-deps chromium
51
+
52
+ - name: Test (all including browser)
53
+ run: pytest -x -q --cov=linksanity --cov-report=xml
54
+
55
+ - name: Upload coverage
56
+ uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4
57
+ with:
58
+ file: coverage.xml
@@ -0,0 +1,31 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+ *.egg-info/
7
+ *.egg
8
+ .eggs/
9
+
10
+ # Virtual environments
11
+ .venv/
12
+ venv/
13
+ env/
14
+
15
+ # Build
16
+ dist/
17
+ build/
18
+ *.whl
19
+
20
+ # Testing & coverage
21
+ .coverage
22
+ .coverage.*
23
+ htmlcov/
24
+ .pytest_cache/
25
+ .mypy_cache/
26
+
27
+ # IDE
28
+ .vscode/
29
+ .idea/
30
+ *.swp
31
+ .DS_Store
@@ -0,0 +1,52 @@
1
+ # Contributing to linksanity
2
+
3
+ Thanks for your interest in contributing!
4
+
5
+ ## Setup
6
+
7
+ ```bash
8
+ git clone https://github.com/ya8282/linksanity
9
+ cd linksanity
10
+ python -m venv .venv && source .venv/bin/activate
11
+ pip install -e ".[dev,browser]"
12
+ playwright install chromium
13
+ ```
14
+
15
+ ## Running tests
16
+
17
+ ```bash
18
+ pytest # all tests
19
+ pytest tests/unit/ # unit tests only (no browser)
20
+ pytest tests/integration/ # integration tests (browser optional)
21
+ ```
22
+
23
+ ## Code quality
24
+
25
+ ```bash
26
+ ruff check linksanity/ tests/ --fix # lint + auto-fix
27
+ mypy linksanity/ # type check (strict mode)
28
+ ```
29
+
30
+ Both must pass before opening a PR.
31
+
32
+ ## Guidelines
33
+
34
+ - Follow the existing code style (ruff-enforced)
35
+ - New features need unit tests; new checkers/parsers need integration tests
36
+ - All public functions must have type annotations
37
+ - `GITHUB_TOKEN` must never be accepted as a CLI argument — env only
38
+ - Never write to disk unless `--output`, `--report`, or `--github-issue` is passed
39
+
40
+ ## Pull requests
41
+
42
+ 1. Fork and create a branch from `main`
43
+ 2. Write tests for your change
44
+ 3. Run `pytest`, `ruff check`, and `mypy` — all must pass
45
+ 4. Open a PR with a short description of what changed and why
46
+
47
+ ## Reporting bugs
48
+
49
+ Open an issue at https://github.com/ya8282/linksanity/issues with:
50
+ - Python version
51
+ - Command you ran
52
+ - Expected vs. actual output
@@ -0,0 +1,436 @@
1
+ Metadata-Version: 2.4
2
+ Name: linksanity
3
+ Version: 0.1.0
4
+ Summary: Detect broken links in Markdown, reStructuredText, and HTML documentation
5
+ Project-URL: Homepage, https://github.com/ya8282/linksanity
6
+ Project-URL: Bug Tracker, https://github.com/ya8282/linksanity/issues
7
+ Author: linksanity contributors
8
+ License: MIT
9
+ Keywords: broken-links,documentation,html,link-checker,markdown,rst
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: MacOS
15
+ Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Documentation
21
+ Classifier: Topic :: Software Development :: Testing
22
+ Classifier: Topic :: Utilities
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.11
25
+ Requires-Dist: beautifulsoup4>=4.12
26
+ Requires-Dist: docutils>=0.20
27
+ Requires-Dist: httpx[http2]>=0.27
28
+ Requires-Dist: lxml>=5.0
29
+ Requires-Dist: markdown-it-py>=3.0
30
+ Requires-Dist: rich>=13.0
31
+ Requires-Dist: typer>=0.12
32
+ Provides-Extra: browser
33
+ Requires-Dist: playwright>=1.40; extra == 'browser'
34
+ Provides-Extra: dev
35
+ Requires-Dist: mypy>=1.10; extra == 'dev'
36
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
37
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
38
+ Requires-Dist: pytest>=8.0; extra == 'dev'
39
+ Requires-Dist: respx>=0.21; extra == 'dev'
40
+ Requires-Dist: ruff>=0.4; extra == 'dev'
41
+ Requires-Dist: types-beautifulsoup4; extra == 'dev'
42
+ Requires-Dist: types-docutils; extra == 'dev'
43
+ Description-Content-Type: text/markdown
44
+
45
+ # linksanity (🏀17)
46
+
47
+ Detect broken links and redirects in Markdown, reStructuredText, and HTML documentation.
48
+
49
+ ```
50
+ $ linksanity scan ./docs/
51
+ docs/api/guide.md
52
+ BROKEN line 12 ./missing.md — file not found
53
+ REDIRECT line 45 https://old.example.com → https://new.example.com
54
+
55
+ ok=38 broken=1 redirect=1 skipped=0
56
+ ```
57
+
58
+ ## Features
59
+
60
+ - **Static scan** — parse `.md`, `.rst`, and `.html` source files without a browser
61
+ - **Live crawl** — follow links on a deployed site using a headless browser (Playwright)
62
+ - **Exit codes** — `0` = clean, `1` = broken links found (ideal for CI)
63
+ - **Multiple formats** — console (Rich), JSON, CSV; optional Markdown summary report
64
+ - **Anchor validation** — opt-in `--check-anchors` flag
65
+ - **GitHub Issues** — create or update an issue summarising broken links
66
+ - **Ignore domains** — skip domains you don't control
67
+ - **JS-rendered pages** — route specific domains through Playwright in scan mode
68
+ - **Retry logic** — exponential back-off on 429/503; HEAD→GET fallback on 405
69
+
70
+ ## Install
71
+
72
+ **From PyPI** (once published):
73
+
74
+ ```bash
75
+ pip install linksanity
76
+
77
+ # Optional: browser support for JS-rendered pages
78
+ pip install "linksanity[browser]"
79
+ playwright install chromium
80
+ ```
81
+
82
+ **From source:**
83
+
84
+ ```bash
85
+ git clone https://github.com/linksanity/linksanity
86
+ cd linksanity
87
+ pip install -e ".[dev,browser]"
88
+ playwright install chromium
89
+ ```
90
+
91
+ Requires Python 3.11+.
92
+
93
+ ## Quick start
94
+
95
+ ### Scan local source files
96
+
97
+ ```bash
98
+ # Scan a directory (finds all .md / .rst / .html files recursively)
99
+ linksanity scan ./docs/
100
+
101
+ # Scan specific files or globs
102
+ linksanity scan README.md docs/**/*.md
103
+
104
+ # Validate anchor fragments too
105
+ linksanity scan ./docs/ --check-anchors
106
+
107
+ # Write JSON output; exit 1 if broken links found
108
+ linksanity scan ./docs/ --format json --output results.json
109
+
110
+ # Create a Markdown summary report
111
+ linksanity scan ./docs/ --report report.md
112
+
113
+ # Skip domains you don't control
114
+ echo "internal.corp.example.com" > ignore.txt
115
+ linksanity scan ./docs/ --ignore-domains ignore.txt
116
+ ```
117
+
118
+ ### Crawl a live site
119
+
120
+ ```bash
121
+ # Crawl up to 500 pages (default)
122
+ linksanity crawl https://docs.example.com
123
+
124
+ # Limit crawl depth
125
+ linksanity crawl https://docs.example.com --max-pages 50
126
+
127
+ # Ignore external domains
128
+ linksanity crawl https://docs.example.com --ignore-domains ignore.txt
129
+ ```
130
+
131
+ ### CI integration
132
+
133
+ Add a link-check job that runs on every pull request and on a weekly schedule.
134
+
135
+ ```yaml
136
+ # .github/workflows/linkcheck.yml
137
+ name: Link check
138
+
139
+ on:
140
+ push:
141
+ branches: [main]
142
+ pull_request:
143
+ branches: [main]
144
+ schedule:
145
+ - cron: "0 8 * * 1" # every Monday at 08:00 UTC
146
+
147
+ permissions:
148
+ contents: read
149
+
150
+ jobs:
151
+ linkcheck:
152
+ runs-on: ubuntu-latest
153
+ steps:
154
+ - uses: actions/checkout@v4
155
+
156
+ - uses: actions/setup-python@v5
157
+ with:
158
+ python-version: "3.11"
159
+ cache: pip
160
+
161
+ - name: Install linksanity
162
+ run: pip install linksanity
163
+
164
+ - name: Check links
165
+ run: |
166
+ linksanity scan ./docs/ \
167
+ --skip-urls .linksanity-skip \
168
+ --format json \
169
+ --output linkcheck.json
170
+
171
+ - name: Upload results
172
+ if: always()
173
+ uses: actions/upload-artifact@v4
174
+ with:
175
+ name: linkcheck-results
176
+ path: linkcheck.json
177
+ ```
178
+
179
+ **File-based skip list** — commit a `.linksanity-skip` file at your repo root to exclude auth-gated or staging URLs. Supports `*` wildcards:
180
+
181
+ ```
182
+ # .linksanity-skip
183
+ https://app.example.com/login
184
+ https://staging.example.com/*
185
+ https://internal.corp.example.com/*
186
+ ```
187
+
188
+ **Report broken links to a GitHub Issue** — useful for scheduled runs that find regressions after merge:
189
+
190
+ ```yaml
191
+ - name: Report broken links
192
+ if: failure()
193
+ env:
194
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
195
+ run: |
196
+ linksanity scan ./docs/ \
197
+ --github-issue \
198
+ --repo ${{ github.repository }}
199
+ ```
200
+
201
+ `GITHUB_TOKEN` is always read from the environment — never pass it as a CLI flag or store it in a file.
202
+
203
+ **Crawl a live docs site** — swap `scan` for `crawl` to test a deployed site:
204
+
205
+ ```yaml
206
+ - name: Crawl live docs
207
+ run: |
208
+ pip install "linksanity[browser]"
209
+ playwright install --with-deps chromium
210
+ linksanity crawl https://docs.example.com \
211
+ --max-pages 200 \
212
+ --block-analytics \
213
+ --format json \
214
+ --output crawl-results.json
215
+ ```
216
+
217
+ ### GitHub Issue reporting
218
+
219
+ Use `--github-issue` when you want broken links surfaced as a trackable GitHub Issue rather than just a failed CI run. It creates or updates a single `[linksanity]` issue listing every broken URL, so the team has a persistent record to triage — not just a red check mark that disappears on the next push.
220
+
221
+ **When to use it:**
222
+
223
+ - **Scheduled runs** — a weekly cron job catches link rot that crept in after your last merge. The issue stays open until you fix the links and the check goes green.
224
+ - **Repos without branch protection** — if broken links won't block a PR merge, an issue is the only signal that survives past the CI run.
225
+ - **Large docs sites** — when dozens of links break at once (e.g. a domain migration), a single issue is easier to triage than scrolling through CI logs.
226
+
227
+ **When you don't need it:**
228
+
229
+ - PRs where branch protection already blocks the merge on failure — a failed job is sufficient.
230
+ - Local runs and one-off checks.
231
+
232
+ **Setup:**
233
+
234
+ ```bash
235
+ export GITHUB_TOKEN=ghp_...
236
+ linksanity scan ./docs/ --github-issue --repo owner/repo
237
+ ```
238
+
239
+ `GITHUB_TOKEN` is read from the environment only — never pass it as a CLI flag or store it in a file. In GitHub Actions, use the built-in token:
240
+
241
+ ```yaml
242
+ env:
243
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
244
+ ```
245
+
246
+ The workflow job also needs `issues: write` permission:
247
+
248
+ ```yaml
249
+ permissions:
250
+ contents: read
251
+ issues: write
252
+ ```
253
+
254
+ ## Use with AI agents
255
+
256
+ linksanity is designed to be a clean tool call for AI agents. Use `--format json` so an agent can parse structured output without screen-scraping console text.
257
+
258
+ **Exit codes** are the primary signal:
259
+
260
+ | Code | Meaning |
261
+ |---|---|
262
+ | `0` | All links OK |
263
+ | `1` | One or more broken links |
264
+ | `2` | Invocation error |
265
+
266
+ ### JSON output schema
267
+
268
+ ```bash
269
+ linksanity scan ./docs/ --format json --output results.json
270
+ ```
271
+
272
+ Each item in the output array has:
273
+
274
+ ```json
275
+ [
276
+ {
277
+ "url": "https://example.com/old",
278
+ "source_file": "docs/guide.md",
279
+ "line": 42,
280
+ "status": "broken",
281
+ "status_code": 404,
282
+ "redirect_url": null,
283
+ "error": null
284
+ }
285
+ ]
286
+ ```
287
+
288
+ `status` is one of `"ok"`, `"broken"`, `"redirect"`, `"skipped"`, or `"error"`.
289
+
290
+ ### Python subprocess usage
291
+
292
+ Use this when you want to drive linksanity from a Python script or agent — for example, to file tickets, send alerts, or trigger auto-repair after a scan. linksanity doesn't expose a public Python API, so `subprocess.run` is the correct integration point.
293
+
294
+ `result.returncode` is the fast path: check it before touching the file. If it's `2`, something went wrong with invocation — read `result.stderr` for the error message rather than trying to parse the output file.
295
+
296
+ ```python
297
+ import json
298
+ import subprocess
299
+
300
+ result = subprocess.run(
301
+ ["linksanity", "scan", "./docs/", "--format", "json", "--output", "results.json"],
302
+ capture_output=True, # stdout goes to the file; stderr carries error messages
303
+ text=True,
304
+ )
305
+
306
+ if result.returncode == 2:
307
+ raise RuntimeError(f"linksanity invocation error: {result.stderr.strip()}")
308
+
309
+ with open("results.json") as f:
310
+ links = json.load(f)
311
+
312
+ # result.returncode == 1 means broken links exist; iterate to act on them
313
+ broken = [r for r in links if r["status"] == "broken"]
314
+ ```
315
+
316
+ ### MCP tool definition
317
+
318
+ Register linksanity as a tool so an AI agent can call it on demand:
319
+
320
+ ```json
321
+ {
322
+ "name": "check_links",
323
+ "description": "Scan documentation files for broken links. Returns structured JSON. Exit code 1 means broken links were found.",
324
+ "inputSchema": {
325
+ "type": "object",
326
+ "properties": {
327
+ "paths": {
328
+ "type": "array",
329
+ "items": { "type": "string" },
330
+ "description": "Files or directories to scan"
331
+ },
332
+ "skip_urls_file": {
333
+ "type": "string",
334
+ "description": "Path to a file listing URLs to skip (optional)"
335
+ }
336
+ },
337
+ "required": ["paths"]
338
+ }
339
+ }
340
+ ```
341
+
342
+ Invoke it in your MCP server by shelling out to `linksanity scan <paths> --format json --output /tmp/results.json` and returning the parsed JSON.
343
+
344
+ ### Claude Code / claude-code tool call
345
+
346
+ If you use Claude Code, you can invoke linksanity directly from the Claude CLI:
347
+
348
+ ```
349
+ ! linksanity scan ./docs/ --format json --output results.json
350
+ ```
351
+
352
+ Then ask Claude to interpret the output:
353
+
354
+ ```
355
+ Read results.json and summarise which links are broken and why they might have rotted.
356
+ ```
357
+
358
+ ## Options
359
+
360
+ ### `linksanity scan <paths...>`
361
+
362
+ | Flag | Default | Description |
363
+ |---|---|---|
364
+ | `--workers N` | 5 | Max concurrent HTTP checks |
365
+ | `--timeout N` | 10 | Per-request timeout (seconds) |
366
+ | `--retry N` | 2 | Retries on 429/503 |
367
+ | `--check-anchors` | off | Validate `#fragment` links |
368
+ | `--ignore-domains FILE` | — | One domain per line to skip |
369
+ | `--js-domains FILE` | — | Domains to check via Playwright |
370
+ | `--skip-urls FILE` | — | URLs/patterns to skip (one per line, `*` wildcards ok) |
371
+ | `--format` | console | `console`, `json`, or `csv` |
372
+ | `--output FILE` | stdout | Write results to file |
373
+ | `--report FILE` | — | Write Markdown summary to file |
374
+ | `--github-issue` | off | Open/update a GitHub Issue |
375
+ | `--repo OWNER/REPO` | — | Required with `--github-issue` |
376
+ | `--config FILE` | auto | Path to `linksanity.toml` |
377
+
378
+ ### `linksanity crawl <url>`
379
+
380
+ Same flags as `scan`, minus `--check-anchors` and `--js-domains`, plus:
381
+
382
+ | Flag | Default | Description |
383
+ |---|---|---|
384
+ | `--max-pages N` | 500 | Stop after N pages crawled |
385
+ | `--playwright-workers N` | 2 | Max concurrent browser sessions |
386
+ | `--skip-urls FILE` | — | URLs/patterns to skip (one per line, `*` wildcards ok) |
387
+ | `--block-analytics` | off | Block analytics/tracking domains in the browser |
388
+
389
+ ## Configuration file
390
+
391
+ Place a `linksanity.toml` in your project root (auto-discovered):
392
+
393
+ ```toml
394
+ workers = 10
395
+ timeout = 15
396
+ retry = 3
397
+ check_anchors = false
398
+ max_pages = 200
399
+ block_analytics = true
400
+
401
+ ignore_domains = ["status.example.com", "internal.example.com"]
402
+ js_domains = ["spa.example.com"]
403
+ skip_urls = [
404
+ "https://app.example.com/login",
405
+ "https://staging.example.com/*",
406
+ ]
407
+ ```
408
+
409
+ ## Exit codes
410
+
411
+ | Code | Meaning |
412
+ |---|---|
413
+ | `0` | All links OK (or only redirects/skipped) |
414
+ | `1` | One or more broken links |
415
+ | `2` | Invocation error (bad arguments, missing file) |
416
+
417
+ ## Development
418
+
419
+ ```bash
420
+ git clone https://github.com/linksanity/linksanity
421
+ cd linksanity
422
+ python -m venv .venv && source .venv/bin/activate
423
+ pip install -e ".[dev,browser]"
424
+ playwright install chromium
425
+
426
+ # Run tests
427
+ pytest
428
+
429
+ # Lint + type check
430
+ ruff check linksanity/ tests/
431
+ mypy linksanity/
432
+ ```
433
+
434
+ ## License
435
+
436
+ MIT