oryon-score 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oryon_score-0.1.0/.gitignore +15 -0
- oryon_score-0.1.0/LICENSE +21 -0
- oryon_score-0.1.0/PKG-INFO +236 -0
- oryon_score-0.1.0/README.md +211 -0
- oryon_score-0.1.0/oryon_score/__init__.py +4 -0
- oryon_score-0.1.0/oryon_score/cli.py +120 -0
- oryon_score-0.1.0/oryon_score/score.py +722 -0
- oryon_score-0.1.0/pyproject.toml +47 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Amaury / SEOryon
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: oryon-score
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Score any URL for AI search readiness. Free open-source tool by Oryon.
|
|
5
|
+
Project-URL: Homepage, https://seoryon.com
|
|
6
|
+
Project-URL: Repository, https://github.com/SEOryon/oryon-score
|
|
7
|
+
Project-URL: Issues, https://github.com/SEOryon/oryon-score/issues
|
|
8
|
+
Author-email: Amaury <amaury@seoryon.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: aeo,ai-overview,ai-search,audit,geo,llm-citation,schema,seo
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
22
|
+
Requires-Dist: httpx>=0.27
|
|
23
|
+
Requires-Dist: lxml>=5.0
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
# Oryon AI Search Readiness Score
|
|
27
|
+
|
|
28
|
+
> Score any URL for AI search readiness. Free, open-source, no signup.
|
|
29
|
+
> Try it live → **[score.seoryon.com](https://score.seoryon.com)**
|
|
30
|
+
|
|
31
|
+
[](LICENSE)
|
|
32
|
+
[](https://pypi.org/project/oryon-score/)
|
|
33
|
+
[](https://github.com/SEOryon/oryon-score)
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## What this is
|
|
38
|
+
|
|
39
|
+
A free tool that scores any URL on **27 signals** AI search engines use to decide what to cite, and returns the top fixes — ranked by impact.
|
|
40
|
+
|
|
41
|
+
Built and maintained by **[Oryon](https://seoryon.com)** — the SEO engine that writes and publishes articles built to rank AND get cited. This is the demo. Your whole site is the product.
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
$ oryon-score https://example.com/blog/ai-overview-guide
|
|
45
|
+
|
|
46
|
+
Oryon AI Search Readiness Score
|
|
47
|
+
https://example.com/blog/ai-overview-guide
|
|
48
|
+
The AI Overview Guide: How to Get Cited by Google AI
|
|
49
|
+
|
|
50
|
+
62/100 · Grade C
|
|
51
|
+
|
|
52
|
+
By bucket
|
|
53
|
+
Schema Structure ████████████████░░░░░░░░ 18.6/30
|
|
54
|
+
Content Format █████████████████░░░░░░░ 17.0/25
|
|
55
|
+
Authority ████████████░░░░░░░░░░░░ 12.0/20
|
|
56
|
+
Crawlability ███████████████████░░░░░ 11.4/15
|
|
57
|
+
Freshness ███░░░░░░░░░░░░░░░░░░░░░ 3.0/10
|
|
58
|
+
|
|
59
|
+
Top fixes (in order of impact)
|
|
60
|
+
✗ FAQ schema (No FAQPage schema.)
|
|
61
|
+
→ Wrap your FAQ section in FAQPage JSON-LD — highest-correlation signal.
|
|
62
|
+
✗ TL;DR / summary block near top (No TL;DR or summary block found.)
|
|
63
|
+
→ Add a 50-word TL;DR after the H1. AI summarizers lift these at much higher rates.
|
|
64
|
+
✗ Last modified date (No modification date detected.)
|
|
65
|
+
→ Expose a dateModified in JSON-LD or via Last-Modified header.
|
|
66
|
+
...
|
|
67
|
+
|
|
68
|
+
Want continuous scoring across every page on your site?
|
|
69
|
+
→ Try Oryon free for 3 days: seoryon.com
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Why this exists
|
|
75
|
+
|
|
76
|
+
The market for AI citation tracking is dominated by dashboards starting at **€295/mo**. Most SEO teams just need to answer one question: *"is my page set up to get cited at all?"* That answer should be free.
|
|
77
|
+
|
|
78
|
+
This tool gives you that answer in 10 seconds. No signup. No tokens. Just paste a URL.
|
|
79
|
+
|
|
80
|
+
If you want **continuous scoring across every URL on your site**, plus AI citation tracking across ChatGPT / Perplexity / Gemini / Google AI, plus a writer that ships extractable articles automatically — that's [Oryon](https://seoryon.com). From €39/mo.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## What it actually checks
|
|
85
|
+
|
|
86
|
+
27 signals across 5 buckets that AI Overviews + LLM citation systems actually weight:
|
|
87
|
+
|
|
88
|
+
### Schema & structure (30 pts)
|
|
89
|
+
- Article / BlogPosting JSON-LD
|
|
90
|
+
- **FAQPage schema** (highest-correlation signal)
|
|
91
|
+
- HowTo schema
|
|
92
|
+
- BreadcrumbList schema
|
|
93
|
+
- Heading hierarchy (1 H1, ≥3 H2s)
|
|
94
|
+
- Definition lists (`<dl>`)
|
|
95
|
+
- Table markup
|
|
96
|
+
- Question-style H2s
|
|
97
|
+
|
|
98
|
+
### Content / format (25 pts)
|
|
99
|
+
- Word count in the 1200–3500 sweet spot
|
|
100
|
+
- Direct answer in the first 60 words
|
|
101
|
+
- TL;DR / summary block near the top
|
|
102
|
+
- Bold emphasis in the first section
|
|
103
|
+
- 3+ structured lists
|
|
104
|
+
|
|
105
|
+
### Authority (20 pts)
|
|
106
|
+
- Outbound links to .gov / .edu / Wikipedia
|
|
107
|
+
- 5–50 internal links (healthy range)
|
|
108
|
+
- Named author / byline (E-E-A-T)
|
|
109
|
+
- Outbound link density (3–30)
|
|
110
|
+
- `<blockquote>` / `<cite>` markup
|
|
111
|
+
|
|
112
|
+
### Crawlability (15 pts)
|
|
113
|
+
- HTTPS
|
|
114
|
+
- Canonical URL
|
|
115
|
+
- Mobile viewport
|
|
116
|
+
- Open Graph (≥3 og:* tags)
|
|
117
|
+
- `llms.txt` at site root
|
|
118
|
+
- `robots.txt` allows GPTBot, ClaudeBot, PerplexityBot, CCBot, Google-Extended, etc.
|
|
119
|
+
|
|
120
|
+
### Freshness (10 pts)
|
|
121
|
+
- `dateModified` or `Last-Modified` header
|
|
122
|
+
- Dated phrases in body ("as of May 2026")
|
|
123
|
+
- Year in title
|
|
124
|
+
|
|
125
|
+
Each signal has a **specific fix** — what to change, where to change it, why it matters. No fluff.
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Install
|
|
130
|
+
|
|
131
|
+
### Web (no install)
|
|
132
|
+
Just open **[score.seoryon.com](https://score.seoryon.com)** and paste a URL.
|
|
133
|
+
|
|
134
|
+
### CLI (pip)
|
|
135
|
+
```bash
|
|
136
|
+
pip install oryon-score
|
|
137
|
+
|
|
138
|
+
oryon-score https://example.com/blog/your-best-article
|
|
139
|
+
oryon-score https://example.com --json
|
|
140
|
+
oryon-score https://example.com --out report.json
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Requires Python 3.10+.
|
|
144
|
+
|
|
145
|
+
### From source
|
|
146
|
+
```bash
|
|
147
|
+
git clone https://github.com/SEOryon/oryon-score
|
|
148
|
+
cd oryon-score
|
|
149
|
+
pip install -e .
|
|
150
|
+
|
|
151
|
+
oryon-score https://example.com
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Use in Python
|
|
155
|
+
```python
|
|
156
|
+
from oryon_score import score_url
|
|
157
|
+
|
|
158
|
+
result = score_url("https://example.com/blog/post")
|
|
159
|
+
print(result.score, result.grade)
|
|
160
|
+
for fix in result.fixes:
|
|
161
|
+
print(" -", fix)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Deploy your own (Vercel)
|
|
167
|
+
|
|
168
|
+
The `web/` + `api/` folders deploy as a Vercel project. Two files matter:
|
|
169
|
+
|
|
170
|
+
- `web/index.html` — the public scoring page
|
|
171
|
+
- `api/score.py` — the Python serverless endpoint
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
npm i -g vercel
|
|
175
|
+
vercel
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
Done. Your fork is now live at `your-project.vercel.app`. Add a custom domain in Vercel's dashboard.
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## Sample output
|
|
183
|
+
|
|
184
|
+
See [`examples/example_output.json`](examples/example_output.json) for the full JSON shape returned by the API and the `--json` flag.
|
|
185
|
+
|
|
186
|
+
The web UI renders the same data with bucket bars, top fixes, and a "what's working" passlist.
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Why some sites score lower than they "should"
|
|
191
|
+
|
|
192
|
+
A score is not a verdict. It's a snapshot of *extractable* signals on the page itself. Three things this tool **does not** measure:
|
|
193
|
+
|
|
194
|
+
1. **Domain authority / backlink graph.** Out of scope. AI citation correlates with authority, but measuring it requires a third-party API and we kept this free.
|
|
195
|
+
2. **Whether the page is actually cited today.** Use the [Citation Intelligence MCP](https://github.com/AutomateLab-tech/citation-intelligence) by AutomateLab for live LLM citation data.
|
|
196
|
+
3. **Brand authority signals.** Wikipedia mentions, press coverage, Reddit references — they matter, and the tool flags some, but it can't grade them holistically.
|
|
197
|
+
|
|
198
|
+
For all three layers, see [Oryon](https://seoryon.com) — that's what the paid product does.
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
## How it stays free
|
|
203
|
+
|
|
204
|
+
- Runs on Vercel's free Python runtime
|
|
205
|
+
- No third-party APIs, no API keys to maintain
|
|
206
|
+
- No tracking, no telemetry, no user accounts
|
|
207
|
+
- Self-hostable in one click
|
|
208
|
+
|
|
209
|
+
Open source under MIT. Fork it, run it on your own infra, change it, ship it.
|
|
210
|
+
|
|
211
|
+
---
|
|
212
|
+
|
|
213
|
+
## Inspired by
|
|
214
|
+
|
|
215
|
+
This tool's structured-signal approach was inspired by [`citation-intelligence`](https://github.com/AutomateLab-tech/citation-intelligence) by AutomateLab — a self-hosted MCP server for measuring LLM citation visibility. Go check it out if you want programmatic citation data from inside Claude Code or Cursor. This tool solves a different layer (page readiness, not live citation queries) and is original work.
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## Contributing
|
|
220
|
+
|
|
221
|
+
PRs welcome. Especially:
|
|
222
|
+
- New signals (anything with a published correlation study)
|
|
223
|
+
- Translations of the web UI
|
|
224
|
+
- WordPress / Webflow / Shopify integrations
|
|
225
|
+
- A GitHub Action that scores changed URLs on every PR
|
|
226
|
+
|
|
227
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## License
|
|
232
|
+
|
|
233
|
+
MIT — see [LICENSE](LICENSE).
|
|
234
|
+
|
|
235
|
+
Built by **[Oryon](https://seoryon.com)** · Your Organic Growth Engine.
|
|
236
|
+
Follow [@SEOryon](https://instagram.com/SEOryon) for SEO content that doesn't lie.
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# Oryon AI Search Readiness Score
|
|
2
|
+
|
|
3
|
+
> Score any URL for AI search readiness. Free, open-source, no signup.
|
|
4
|
+
> Try it live → **[score.seoryon.com](https://score.seoryon.com)**
|
|
5
|
+
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
[](https://pypi.org/project/oryon-score/)
|
|
8
|
+
[](https://github.com/SEOryon/oryon-score)
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## What this is
|
|
13
|
+
|
|
14
|
+
A free tool that scores any URL on **27 signals** AI search engines use to decide what to cite, and returns the top fixes — ranked by impact.
|
|
15
|
+
|
|
16
|
+
Built and maintained by **[Oryon](https://seoryon.com)** — the SEO engine that writes and publishes articles built to rank AND get cited. This is the demo. Your whole site is the product.
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
$ oryon-score https://example.com/blog/ai-overview-guide
|
|
20
|
+
|
|
21
|
+
Oryon AI Search Readiness Score
|
|
22
|
+
https://example.com/blog/ai-overview-guide
|
|
23
|
+
The AI Overview Guide: How to Get Cited by Google AI
|
|
24
|
+
|
|
25
|
+
62/100 · Grade C
|
|
26
|
+
|
|
27
|
+
By bucket
|
|
28
|
+
Schema Structure ████████████████░░░░░░░░ 18.6/30
|
|
29
|
+
Content Format █████████████████░░░░░░░ 17.0/25
|
|
30
|
+
Authority ████████████░░░░░░░░░░░░ 12.0/20
|
|
31
|
+
Crawlability ███████████████████░░░░░ 11.4/15
|
|
32
|
+
Freshness ███░░░░░░░░░░░░░░░░░░░░░ 3.0/10
|
|
33
|
+
|
|
34
|
+
Top fixes (in order of impact)
|
|
35
|
+
✗ FAQ schema (No FAQPage schema.)
|
|
36
|
+
→ Wrap your FAQ section in FAQPage JSON-LD — highest-correlation signal.
|
|
37
|
+
✗ TL;DR / summary block near top (No TL;DR or summary block found.)
|
|
38
|
+
→ Add a 50-word TL;DR after the H1. AI summarizers lift these at much higher rates.
|
|
39
|
+
✗ Last modified date (No modification date detected.)
|
|
40
|
+
→ Expose a dateModified in JSON-LD or via Last-Modified header.
|
|
41
|
+
...
|
|
42
|
+
|
|
43
|
+
Want continuous scoring across every page on your site?
|
|
44
|
+
→ Try Oryon free for 3 days: seoryon.com
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Why this exists
|
|
50
|
+
|
|
51
|
+
The market for AI citation tracking is dominated by dashboards starting at **€295/mo**. Most SEO teams just need to answer one question: *"is my page set up to get cited at all?"* That answer should be free.
|
|
52
|
+
|
|
53
|
+
This tool gives you that answer in 10 seconds. No signup. No tokens. Just paste a URL.
|
|
54
|
+
|
|
55
|
+
If you want **continuous scoring across every URL on your site**, plus AI citation tracking across ChatGPT / Perplexity / Gemini / Google AI, plus a writer that ships extractable articles automatically — that's [Oryon](https://seoryon.com). From €39/mo.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## What it actually checks
|
|
60
|
+
|
|
61
|
+
27 signals across 5 buckets that AI Overviews + LLM citation systems actually weight:
|
|
62
|
+
|
|
63
|
+
### Schema & structure (30 pts)
|
|
64
|
+
- Article / BlogPosting JSON-LD
|
|
65
|
+
- **FAQPage schema** (highest-correlation signal)
|
|
66
|
+
- HowTo schema
|
|
67
|
+
- BreadcrumbList schema
|
|
68
|
+
- Heading hierarchy (1 H1, ≥3 H2s)
|
|
69
|
+
- Definition lists (`<dl>`)
|
|
70
|
+
- Table markup
|
|
71
|
+
- Question-style H2s
|
|
72
|
+
|
|
73
|
+
### Content / format (25 pts)
|
|
74
|
+
- Word count in the 1200–3500 sweet spot
|
|
75
|
+
- Direct answer in the first 60 words
|
|
76
|
+
- TL;DR / summary block near the top
|
|
77
|
+
- Bold emphasis in the first section
|
|
78
|
+
- 3+ structured lists
|
|
79
|
+
|
|
80
|
+
### Authority (20 pts)
|
|
81
|
+
- Outbound links to .gov / .edu / Wikipedia
|
|
82
|
+
- 5–50 internal links (healthy range)
|
|
83
|
+
- Named author / byline (E-E-A-T)
|
|
84
|
+
- Outbound link density (3–30)
|
|
85
|
+
- `<blockquote>` / `<cite>` markup
|
|
86
|
+
|
|
87
|
+
### Crawlability (15 pts)
|
|
88
|
+
- HTTPS
|
|
89
|
+
- Canonical URL
|
|
90
|
+
- Mobile viewport
|
|
91
|
+
- Open Graph (≥3 og:* tags)
|
|
92
|
+
- `llms.txt` at site root
|
|
93
|
+
- `robots.txt` allows GPTBot, ClaudeBot, PerplexityBot, CCBot, Google-Extended, etc.
|
|
94
|
+
|
|
95
|
+
### Freshness (10 pts)
|
|
96
|
+
- `dateModified` or `Last-Modified` header
|
|
97
|
+
- Dated phrases in body ("as of May 2026")
|
|
98
|
+
- Year in title
|
|
99
|
+
|
|
100
|
+
Each signal has a **specific fix** — what to change, where to change it, why it matters. No fluff.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Install
|
|
105
|
+
|
|
106
|
+
### Web (no install)
|
|
107
|
+
Just open **[score.seoryon.com](https://score.seoryon.com)** and paste a URL.
|
|
108
|
+
|
|
109
|
+
### CLI (pip)
|
|
110
|
+
```bash
|
|
111
|
+
pip install oryon-score
|
|
112
|
+
|
|
113
|
+
oryon-score https://example.com/blog/your-best-article
|
|
114
|
+
oryon-score https://example.com --json
|
|
115
|
+
oryon-score https://example.com --out report.json
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Requires Python 3.10+.
|
|
119
|
+
|
|
120
|
+
### From source
|
|
121
|
+
```bash
|
|
122
|
+
git clone https://github.com/SEOryon/oryon-score
|
|
123
|
+
cd oryon-score
|
|
124
|
+
pip install -e .
|
|
125
|
+
|
|
126
|
+
oryon-score https://example.com
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Use in Python
|
|
130
|
+
```python
|
|
131
|
+
from oryon_score import score_url
|
|
132
|
+
|
|
133
|
+
result = score_url("https://example.com/blog/post")
|
|
134
|
+
print(result.score, result.grade)
|
|
135
|
+
for fix in result.fixes:
|
|
136
|
+
print(" -", fix)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## Deploy your own (Vercel)
|
|
142
|
+
|
|
143
|
+
The `web/` + `api/` folders deploy as a Vercel project. Two files matter:
|
|
144
|
+
|
|
145
|
+
- `web/index.html` — the public scoring page
|
|
146
|
+
- `api/score.py` — the Python serverless endpoint
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
npm i -g vercel
|
|
150
|
+
vercel
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Done. Your fork is now live at `your-project.vercel.app`. Add a custom domain in Vercel's dashboard.
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Sample output
|
|
158
|
+
|
|
159
|
+
See [`examples/example_output.json`](examples/example_output.json) for the full JSON shape returned by the API and the `--json` flag.
|
|
160
|
+
|
|
161
|
+
The web UI renders the same data with bucket bars, top fixes, and a "what's working" passlist.
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## Why some sites score lower than they "should"
|
|
166
|
+
|
|
167
|
+
A score is not a verdict. It's a snapshot of *extractable* signals on the page itself. Three things this tool **does not** measure:
|
|
168
|
+
|
|
169
|
+
1. **Domain authority / backlink graph.** Out of scope. AI citation correlates with authority, but measuring it requires a third-party API and we kept this free.
|
|
170
|
+
2. **Whether the page is actually cited today.** Use the [Citation Intelligence MCP](https://github.com/AutomateLab-tech/citation-intelligence) by AutomateLab for live LLM citation data.
|
|
171
|
+
3. **Brand authority signals.** Wikipedia mentions, press coverage, Reddit references — they matter, and the tool flags some, but it can't grade them holistically.
|
|
172
|
+
|
|
173
|
+
For all three layers, see [Oryon](https://seoryon.com) — that's what the paid product does.
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## How it stays free
|
|
178
|
+
|
|
179
|
+
- Runs on Vercel's free Python runtime
|
|
180
|
+
- No third-party APIs, no API keys to maintain
|
|
181
|
+
- No tracking, no telemetry, no user accounts
|
|
182
|
+
- Self-hostable in one click
|
|
183
|
+
|
|
184
|
+
Open source under MIT. Fork it, run it on your own infra, change it, ship it.
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Inspired by
|
|
189
|
+
|
|
190
|
+
This tool's structured-signal approach was inspired by [`citation-intelligence`](https://github.com/AutomateLab-tech/citation-intelligence) by AutomateLab — a self-hosted MCP server for measuring LLM citation visibility. Go check it out if you want programmatic citation data from inside Claude Code or Cursor. This tool solves a different layer (page readiness, not live citation queries) and is original work.
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## Contributing
|
|
195
|
+
|
|
196
|
+
PRs welcome. Especially:
|
|
197
|
+
- New signals (anything with a published correlation study)
|
|
198
|
+
- Translations of the web UI
|
|
199
|
+
- WordPress / Webflow / Shopify integrations
|
|
200
|
+
- A GitHub Action that scores changed URLs on every PR
|
|
201
|
+
|
|
202
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
MIT — see [LICENSE](LICENSE).
|
|
209
|
+
|
|
210
|
+
Built by **[Oryon](https://seoryon.com)** · Your Organic Growth Engine.
|
|
211
|
+
Follow [@SEOryon](https://instagram.com/SEOryon) for SEO content that doesn't lie.
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
oryon-score CLI — score any URL for AI search readiness.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
oryon-score https://example.com
|
|
6
|
+
oryon-score https://example.com --json
|
|
7
|
+
oryon-score https://example.com --out report.json
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import json
|
|
13
|
+
import sys
|
|
14
|
+
|
|
15
|
+
from .score import score_url
|
|
16
|
+
|
|
17
|
+
# ANSI colors — keep it minimal, fall back gracefully
|
|
18
|
+
def _supports_color() -> bool:
|
|
19
|
+
return sys.stdout.isatty() and not bool(__import__("os").environ.get("NO_COLOR"))
|
|
20
|
+
|
|
21
|
+
C = {
|
|
22
|
+
"reset": "\033[0m",
|
|
23
|
+
"bold": "\033[1m",
|
|
24
|
+
"dim": "\033[2m",
|
|
25
|
+
"violet": "\033[38;5;99m",
|
|
26
|
+
"green": "\033[32m",
|
|
27
|
+
"red": "\033[31m",
|
|
28
|
+
"yellow": "\033[33m",
|
|
29
|
+
"gray": "\033[90m",
|
|
30
|
+
} if _supports_color() else {k: "" for k in ["reset", "bold", "dim", "violet", "green", "red", "yellow", "gray"]}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _bar(percent: float, width: int = 24) -> str:
|
|
34
|
+
filled = int(round(percent / 100 * width))
|
|
35
|
+
return f"{C['violet']}{'█' * filled}{C['gray']}{'·' * (width - filled)}{C['reset']}"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _print_report(result):
|
|
39
|
+
print()
|
|
40
|
+
print(f" {C['bold']}Oryon AI Search Readiness Score{C['reset']}")
|
|
41
|
+
print(f" {C['gray']}{result.url}{C['reset']}")
|
|
42
|
+
if result.page_title:
|
|
43
|
+
print(f" {C['dim']}{result.page_title[:80]}{C['reset']}")
|
|
44
|
+
print()
|
|
45
|
+
|
|
46
|
+
# Fetch failed — show that clearly instead of a fake zero score
|
|
47
|
+
if result.grade == "—" or (result.score == 0 and not result.signals):
|
|
48
|
+
msg = result.notes[0] if result.notes else "Fetch failed."
|
|
49
|
+
print(f" {C['red']}{C['bold']}✗ Could not score this URL{C['reset']}")
|
|
50
|
+
print(f" {C['gray']}{msg}{C['reset']}")
|
|
51
|
+
print()
|
|
52
|
+
print(f" {C['dim']}Common causes: bot protection (Cloudflare), paywall, JS-only site,{C['reset']}")
|
|
53
|
+
print(f" {C['dim']}or the page requires login. Try a different URL on the same domain.{C['reset']}")
|
|
54
|
+
print()
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
grade_color = C["green"] if result.score >= 70 else (C["yellow"] if result.score >= 50 else C["red"])
|
|
58
|
+
print(f" {grade_color}{C['bold']}{result.score}/100 · Grade {result.grade}{C['reset']}")
|
|
59
|
+
print()
|
|
60
|
+
|
|
61
|
+
print(f" {C['bold']}By bucket{C['reset']}")
|
|
62
|
+
for bucket, info in result.bucket_scores.items():
|
|
63
|
+
name = bucket.replace("_", " ").title()
|
|
64
|
+
bar = _bar(info["percent"])
|
|
65
|
+
print(f" {name:<22} {bar} {info['earned']:>4}/{int(info['max']):<3}")
|
|
66
|
+
print()
|
|
67
|
+
|
|
68
|
+
passed = [s for s in result.signals if s.passed]
|
|
69
|
+
failed = [s for s in result.signals if not s.passed]
|
|
70
|
+
|
|
71
|
+
if failed:
|
|
72
|
+
print(f" {C['bold']}Top fixes{C['reset']} {C['gray']}(in order of impact){C['reset']}")
|
|
73
|
+
for s in sorted(failed, key=lambda s: -s.weight)[:8]:
|
|
74
|
+
print(f" {C['red']}✗{C['reset']} {C['bold']}{s.name}{C['reset']} {C['gray']}({s.detail}){C['reset']}")
|
|
75
|
+
if s.fix:
|
|
76
|
+
print(f" {C['gray']}→{C['reset']} {s.fix}")
|
|
77
|
+
print()
|
|
78
|
+
|
|
79
|
+
if passed:
|
|
80
|
+
print(f" {C['bold']}What's working{C['reset']}")
|
|
81
|
+
for s in passed[:6]:
|
|
82
|
+
print(f" {C['green']}✓{C['reset']} {s.name} {C['gray']}— {s.detail}{C['reset']}")
|
|
83
|
+
print()
|
|
84
|
+
|
|
85
|
+
print(f" {C['dim']}Want continuous scoring across every page on your site?{C['reset']}")
|
|
86
|
+
print(f" {C['violet']}→ Try Oryon free for 3 days: seoryon.com{C['reset']}")
|
|
87
|
+
print()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def main() -> int:
|
|
91
|
+
p = argparse.ArgumentParser(
|
|
92
|
+
prog="oryon-score",
|
|
93
|
+
description="Score any URL for AI search readiness. Inspired by Oryon.",
|
|
94
|
+
)
|
|
95
|
+
p.add_argument("url", help="The URL to score (with or without https://)")
|
|
96
|
+
p.add_argument("--json", action="store_true", help="Output JSON only, no human format")
|
|
97
|
+
p.add_argument("--out", help="Write JSON report to file")
|
|
98
|
+
args = p.parse_args()
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
result = score_url(args.url)
|
|
102
|
+
except Exception as e:
|
|
103
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
104
|
+
return 2
|
|
105
|
+
|
|
106
|
+
if args.out:
|
|
107
|
+
with open(args.out, "w") as f:
|
|
108
|
+
json.dump(result.to_dict(), f, indent=2)
|
|
109
|
+
print(f"Saved report to {args.out}")
|
|
110
|
+
|
|
111
|
+
if args.json:
|
|
112
|
+
print(json.dumps(result.to_dict(), indent=2))
|
|
113
|
+
else:
|
|
114
|
+
_print_report(result)
|
|
115
|
+
|
|
116
|
+
return 0 if result.score >= 50 else 1
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
sys.exit(main())
|
|
@@ -0,0 +1,722 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Oryon AI Search Readiness Score
|
|
3
|
+
Core scoring engine. Takes a URL, returns 0-100 score + per-signal results + fixes.
|
|
4
|
+
|
|
5
|
+
No LLM calls. No API keys. Pure HTML parsing + signal heuristics.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import re
|
|
11
|
+
import time
|
|
12
|
+
from dataclasses import dataclass, field, asdict
|
|
13
|
+
from typing import Any
|
|
14
|
+
from urllib.parse import urljoin, urlparse
|
|
15
|
+
|
|
16
|
+
import httpx
|
|
17
|
+
from bs4 import BeautifulSoup
|
|
18
|
+
|
|
19
|
+
# Browser-like UA to get past basic bot walls (Cloudflare, etc.).
|
|
20
|
+
# We still identify in Accept headers + an optional X-Tool header so we're
|
|
21
|
+
# not pretending to be anything we're not.
|
|
22
|
+
UA = (
|
|
23
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
24
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 "
|
|
25
|
+
"OryonAISearchScore/1.0 (+https://seoryon.com)"
|
|
26
|
+
)
|
|
27
|
+
TIMEOUT_S = 15.0
|
|
28
|
+
|
|
29
|
+
# AI crawler user agents we check robots.txt against
|
|
30
|
+
AI_CRAWLERS = [
|
|
31
|
+
"GPTBot",
|
|
32
|
+
"ClaudeBot",
|
|
33
|
+
"PerplexityBot",
|
|
34
|
+
"CCBot",
|
|
35
|
+
"Google-Extended",
|
|
36
|
+
"Applebot-Extended",
|
|
37
|
+
"Bytespider",
|
|
38
|
+
"anthropic-ai",
|
|
39
|
+
"FacebookBot",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
# 5 buckets that add up to 100
|
|
43
|
+
WEIGHTS = {
|
|
44
|
+
"schema_structure": 30,
|
|
45
|
+
"content_format": 25,
|
|
46
|
+
"authority": 20,
|
|
47
|
+
"crawlability": 15,
|
|
48
|
+
"freshness": 10,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class SignalResult:
|
|
54
|
+
name: str
|
|
55
|
+
bucket: str
|
|
56
|
+
passed: bool
|
|
57
|
+
weight: float # points possible if signal == binary; or weight share within bucket
|
|
58
|
+
points: float # actual points earned (0..weight)
|
|
59
|
+
detail: str
|
|
60
|
+
fix: str | None = None # 1-line actionable fix when failed
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class ScoreResult:
|
|
65
|
+
url: str
|
|
66
|
+
score: int # 0-100
|
|
67
|
+
grade: str # A+ A B C D F
|
|
68
|
+
fetched_at: str
|
|
69
|
+
page_title: str | None
|
|
70
|
+
bucket_scores: dict[str, dict[str, float]] = field(default_factory=dict)
|
|
71
|
+
signals: list[SignalResult] = field(default_factory=list)
|
|
72
|
+
fixes: list[str] = field(default_factory=list)
|
|
73
|
+
notes: list[str] = field(default_factory=list)
|
|
74
|
+
|
|
75
|
+
def to_dict(self) -> dict[str, Any]:
|
|
76
|
+
d = asdict(self)
|
|
77
|
+
d["signals"] = [asdict(s) for s in self.signals]
|
|
78
|
+
return d
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _grade(score: int) -> str:
|
|
82
|
+
if score >= 90: return "A+"
|
|
83
|
+
if score >= 80: return "A"
|
|
84
|
+
if score >= 70: return "B"
|
|
85
|
+
if score >= 60: return "C"
|
|
86
|
+
if score >= 45: return "D"
|
|
87
|
+
return "F"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _norm_url(url: str) -> str:
|
|
91
|
+
if not url.startswith(("http://", "https://")):
|
|
92
|
+
url = "https://" + url
|
|
93
|
+
return url
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _fetch(url: str) -> tuple[httpx.Response | None, str | None]:
|
|
97
|
+
headers = {
|
|
98
|
+
"User-Agent": UA,
|
|
99
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
100
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
101
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
102
|
+
"Cache-Control": "no-cache",
|
|
103
|
+
"Pragma": "no-cache",
|
|
104
|
+
"X-Tool": "OryonAISearchScore/1.0",
|
|
105
|
+
}
|
|
106
|
+
try:
|
|
107
|
+
with httpx.Client(
|
|
108
|
+
timeout=TIMEOUT_S, follow_redirects=True, headers=headers
|
|
109
|
+
) as client:
|
|
110
|
+
r = client.get(url)
|
|
111
|
+
if r.status_code >= 400:
|
|
112
|
+
sc = r.status_code
|
|
113
|
+
if sc == 404:
|
|
114
|
+
msg = f"HTTP 404 — that URL doesn't exist. Double-check the path."
|
|
115
|
+
elif sc in (401, 403):
|
|
116
|
+
msg = f"HTTP {sc} — the site blocked our request (bot protection / paywall / login required)."
|
|
117
|
+
elif sc == 429:
|
|
118
|
+
msg = f"HTTP 429 — rate-limited. Wait a minute and try again."
|
|
119
|
+
elif sc >= 500:
|
|
120
|
+
msg = f"HTTP {sc} — the site is broken right now. Try again later."
|
|
121
|
+
else:
|
|
122
|
+
msg = f"HTTP {sc} {r.reason_phrase}"
|
|
123
|
+
return None, msg
|
|
124
|
+
return r, None
|
|
125
|
+
except httpx.HTTPError as e:
|
|
126
|
+
return None, f"Fetch failed: {e!s}"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _fetch_text(url: str) -> str:
|
|
130
|
+
r, _ = _fetch(url)
|
|
131
|
+
if r is None or r.status_code >= 400:
|
|
132
|
+
return ""
|
|
133
|
+
return r.text
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ============ SIGNAL CHECKS ============
|
|
137
|
+
|
|
138
|
+
def _check_https(parsed_url) -> SignalResult:
|
|
139
|
+
ok = parsed_url.scheme == "https"
|
|
140
|
+
return SignalResult(
|
|
141
|
+
name="HTTPS",
|
|
142
|
+
bucket="crawlability",
|
|
143
|
+
passed=ok,
|
|
144
|
+
weight=2,
|
|
145
|
+
points=2 if ok else 0,
|
|
146
|
+
detail="Served over HTTPS." if ok else "Page is not HTTPS.",
|
|
147
|
+
fix=None if ok else "Move the site to HTTPS — AI crawlers and Google demote http URLs.",
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _check_canonical(soup: BeautifulSoup, url: str) -> SignalResult:
|
|
152
|
+
tag = soup.find("link", attrs={"rel": "canonical"})
|
|
153
|
+
if not tag or not tag.get("href"):
|
|
154
|
+
return SignalResult(
|
|
155
|
+
"Canonical URL", "crawlability", False, 2, 0,
|
|
156
|
+
"No canonical link tag found.",
|
|
157
|
+
"Add a `<link rel=\"canonical\" href=\"...\">` to lock the canonical URL for this page.",
|
|
158
|
+
)
|
|
159
|
+
return SignalResult(
|
|
160
|
+
"Canonical URL", "crawlability", True, 2, 2,
|
|
161
|
+
f"Canonical: {tag['href']}", None,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _check_viewport(soup: BeautifulSoup) -> SignalResult:
|
|
166
|
+
tag = soup.find("meta", attrs={"name": "viewport"})
|
|
167
|
+
ok = bool(tag and tag.get("content"))
|
|
168
|
+
return SignalResult(
|
|
169
|
+
"Mobile viewport", "crawlability", ok, 2, 2 if ok else 0,
|
|
170
|
+
"Mobile viewport meta present." if ok else "No mobile viewport meta tag.",
|
|
171
|
+
None if ok else "Add `<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">` to head.",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _check_open_graph(soup: BeautifulSoup) -> SignalResult:
|
|
176
|
+
og_tags = soup.find_all("meta", attrs={"property": re.compile(r"^og:")})
|
|
177
|
+
ok = len(og_tags) >= 3
|
|
178
|
+
return SignalResult(
|
|
179
|
+
"Open Graph tags", "crawlability", ok, 2, 2 if ok else 0,
|
|
180
|
+
f"Found {len(og_tags)} og:* tags." if og_tags else "No Open Graph tags.",
|
|
181
|
+
None if ok else "Add og:title, og:description, og:image, og:url — AI summarizers lift them.",
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _check_llms_txt(parsed_url) -> SignalResult:
|
|
186
|
+
base = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
|
187
|
+
r, _ = _fetch(f"{base}/llms.txt")
|
|
188
|
+
ok = r is not None and r.status_code == 200 and len(r.text) > 50
|
|
189
|
+
return SignalResult(
|
|
190
|
+
"llms.txt file", "crawlability", ok, 3, 3 if ok else 0,
|
|
191
|
+
"llms.txt present at site root." if ok else "No /llms.txt found.",
|
|
192
|
+
None if ok else "Add a /llms.txt file at the site root following llmstxt.org spec.",
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _check_robots_ai(parsed_url) -> SignalResult:
|
|
197
|
+
base = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
|
198
|
+
r, _ = _fetch(f"{base}/robots.txt")
|
|
199
|
+
if r is None or r.status_code != 200:
|
|
200
|
+
return SignalResult(
|
|
201
|
+
"Robots allows AI crawlers", "crawlability", True, 4, 4,
|
|
202
|
+
"No robots.txt — AI crawlers can fetch by default.", None,
|
|
203
|
+
)
|
|
204
|
+
text = r.text.lower()
|
|
205
|
+
blocked = []
|
|
206
|
+
for bot in AI_CRAWLERS:
|
|
207
|
+
pat = re.compile(
|
|
208
|
+
rf"user-agent:\s*{re.escape(bot.lower())}\s*\n((?:[a-z-]+:.*\n?)+)",
|
|
209
|
+
re.IGNORECASE,
|
|
210
|
+
)
|
|
211
|
+
for m in pat.finditer(text):
|
|
212
|
+
block = m.group(1)
|
|
213
|
+
if re.search(r"disallow:\s*/(\s|$)", block):
|
|
214
|
+
blocked.append(bot)
|
|
215
|
+
break
|
|
216
|
+
if blocked:
|
|
217
|
+
return SignalResult(
|
|
218
|
+
"Robots allows AI crawlers", "crawlability", False, 4, max(0, 4 - len(blocked)),
|
|
219
|
+
f"Blocking these AI crawlers via robots.txt: {', '.join(blocked)}.",
|
|
220
|
+
f"Remove the Disallow rules for: {', '.join(blocked)} unless intentional. AI overviews need access to cite you.",
|
|
221
|
+
)
|
|
222
|
+
return SignalResult(
|
|
223
|
+
"Robots allows AI crawlers", "crawlability", True, 4, 4,
|
|
224
|
+
"All major AI crawlers allowed in robots.txt.", None,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
# Schema & structure
|
|
229
|
+
def _extract_jsonld(soup: BeautifulSoup) -> list[dict]:
|
|
230
|
+
out = []
|
|
231
|
+
for tag in soup.find_all("script", attrs={"type": "application/ld+json"}):
|
|
232
|
+
try:
|
|
233
|
+
data = json.loads(tag.string or "")
|
|
234
|
+
except (ValueError, TypeError):
|
|
235
|
+
continue
|
|
236
|
+
if isinstance(data, dict):
|
|
237
|
+
out.append(data)
|
|
238
|
+
elif isinstance(data, list):
|
|
239
|
+
out.extend([d for d in data if isinstance(d, dict)])
|
|
240
|
+
return out
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _schema_types(jsonld: list[dict]) -> set[str]:
|
|
244
|
+
types: set[str] = set()
|
|
245
|
+
for entry in jsonld:
|
|
246
|
+
graph = entry.get("@graph", [entry]) if "@graph" in entry else [entry]
|
|
247
|
+
for node in graph:
|
|
248
|
+
t = node.get("@type")
|
|
249
|
+
if isinstance(t, str):
|
|
250
|
+
types.add(t)
|
|
251
|
+
elif isinstance(t, list):
|
|
252
|
+
types.update(str(x) for x in t)
|
|
253
|
+
return types
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _check_article_schema(types: set[str]) -> SignalResult:
|
|
257
|
+
has = bool(types & {"Article", "NewsArticle", "BlogPosting", "TechArticle"})
|
|
258
|
+
return SignalResult(
|
|
259
|
+
"Article schema", "schema_structure", has, 5, 5 if has else 0,
|
|
260
|
+
f"Article-type schema present: {sorted(types & {'Article', 'NewsArticle', 'BlogPosting', 'TechArticle'})}" if has
|
|
261
|
+
else "No Article / BlogPosting / NewsArticle schema found.",
|
|
262
|
+
None if has else "Add JSON-LD with @type: Article (or BlogPosting). Required for most AI Overview citations.",
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _check_faq_schema(types: set[str]) -> SignalResult:
|
|
267
|
+
has = "FAQPage" in types
|
|
268
|
+
return SignalResult(
|
|
269
|
+
"FAQ schema", "schema_structure", has, 6, 6 if has else 0,
|
|
270
|
+
"FAQPage JSON-LD present." if has else "No FAQPage schema.",
|
|
271
|
+
None if has else "Wrap your FAQ section in FAQPage JSON-LD — highest-correlation signal for AI Overview citations.",
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _check_howto_schema(types: set[str]) -> SignalResult:
|
|
276
|
+
has = "HowTo" in types
|
|
277
|
+
return SignalResult(
|
|
278
|
+
"HowTo schema", "schema_structure", has, 3, 3 if has else 0,
|
|
279
|
+
"HowTo schema present." if has else "No HowTo schema.",
|
|
280
|
+
None if has else "If your page has steps, add HowTo schema. Heavily lifted by AI summarizers.",
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _check_breadcrumb_schema(types: set[str]) -> SignalResult:
|
|
285
|
+
has = "BreadcrumbList" in types
|
|
286
|
+
return SignalResult(
|
|
287
|
+
"Breadcrumb schema", "schema_structure", has, 3, 3 if has else 0,
|
|
288
|
+
"BreadcrumbList schema present." if has else "No BreadcrumbList schema.",
|
|
289
|
+
None if has else "Add BreadcrumbList JSON-LD — helps AI understand site hierarchy.",
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _check_heading_structure(soup: BeautifulSoup) -> SignalResult:
|
|
294
|
+
# Count semantic h1s + ARIA-level-1 headings (modern frameworks often do <div role="heading" aria-level="1">)
|
|
295
|
+
h1s = soup.find_all("h1")
|
|
296
|
+
aria_h1s = soup.find_all(attrs={"role": "heading", "aria-level": "1"})
|
|
297
|
+
h2s = soup.find_all("h2") + soup.find_all(attrs={"role": "heading", "aria-level": "2"})
|
|
298
|
+
h1_count = len(h1s) + len(aria_h1s)
|
|
299
|
+
h2_count = len(h2s)
|
|
300
|
+
|
|
301
|
+
# Many modern pages style the title with CSS instead of <h1>. If we found a title tag,
|
|
302
|
+
# treat that as the implicit primary heading.
|
|
303
|
+
if h1_count == 0 and soup.find("title"):
|
|
304
|
+
h1_count = 1
|
|
305
|
+
detail_h1 = "(no explicit <h1>; using <title> as implicit primary heading)"
|
|
306
|
+
else:
|
|
307
|
+
detail_h1 = ""
|
|
308
|
+
|
|
309
|
+
if h1_count == 1 and h2_count >= 3:
|
|
310
|
+
return SignalResult(
|
|
311
|
+
"Heading structure", "schema_structure", True, 4, 4,
|
|
312
|
+
f"1 H1, {h2_count} H2s. Clean hierarchy. {detail_h1}".strip(), None,
|
|
313
|
+
)
|
|
314
|
+
fix = []
|
|
315
|
+
if h1_count == 0:
|
|
316
|
+
fix.append("No H1 detected. Add exactly one <h1> with the page's primary title.")
|
|
317
|
+
elif h1_count > 1:
|
|
318
|
+
fix.append(f"{h1_count} H1 tags found — should be exactly one.")
|
|
319
|
+
if h2_count < 3:
|
|
320
|
+
fix.append(f"Only {h2_count} H2s. AI extractors chunk by H2 — add more sectioning.")
|
|
321
|
+
partial = 2 if (h1_count >= 1 and h2_count >= 2) else 1
|
|
322
|
+
return SignalResult(
|
|
323
|
+
"Heading structure", "schema_structure", False, 4, partial,
|
|
324
|
+
f"{h1_count} H1, {h2_count} H2s. {detail_h1}".strip(),
|
|
325
|
+
" ".join(fix) if fix else "Improve heading hierarchy.",
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _check_definition_lists(soup: BeautifulSoup) -> SignalResult:
|
|
330
|
+
dl = soup.find_all("dl")
|
|
331
|
+
ok = len(dl) >= 1
|
|
332
|
+
return SignalResult(
|
|
333
|
+
"Definition lists", "schema_structure", ok, 3, 3 if ok else 0,
|
|
334
|
+
f"{len(dl)} `<dl>` elements." if ok else "No definition lists.",
|
|
335
|
+
None if ok else "Use `<dl><dt>term</dt><dd>definition</dd></dl>` for glossary-style content — strong lift signal.",
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _check_tables(soup: BeautifulSoup) -> SignalResult:
|
|
340
|
+
tables = soup.find_all("table")
|
|
341
|
+
ok = len(tables) >= 1
|
|
342
|
+
return SignalResult(
|
|
343
|
+
"Table markup", "schema_structure", ok, 3, 3 if ok else 0,
|
|
344
|
+
f"{len(tables)} `<table>` element(s)." if ok else "No tables.",
|
|
345
|
+
None if ok else "Comparison data deserves real `<table>` markup, not images of tables. AI lifts these.",
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _check_question_h2s(soup: BeautifulSoup) -> SignalResult:
|
|
350
|
+
h2s = soup.find_all("h2")
|
|
351
|
+
q_count = sum(1 for h in h2s if h.get_text(strip=True).rstrip().endswith("?"))
|
|
352
|
+
ok = q_count >= 2
|
|
353
|
+
return SignalResult(
|
|
354
|
+
"Question-style H2s", "schema_structure", ok, 3, min(3, q_count),
|
|
355
|
+
f"{q_count} H2s end with a question mark." if q_count else "No question-style H2s.",
|
|
356
|
+
None if ok else "Convert ≥2 H2s to actual user questions. AI Overviews extract Q&A patterns disproportionately.",
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
# Content / format
|
|
361
|
+
def _word_count(soup: BeautifulSoup) -> int:
|
|
362
|
+
for tag in soup(["script", "style", "noscript", "header", "footer", "nav"]):
|
|
363
|
+
tag.decompose()
|
|
364
|
+
text = soup.get_text(separator=" ", strip=True)
|
|
365
|
+
return len(re.findall(r"\b\w+\b", text))
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def _check_word_count(wc: int) -> SignalResult:
|
|
369
|
+
if 1200 <= wc <= 3500:
|
|
370
|
+
return SignalResult(
|
|
371
|
+
"Word count (lift-worthy range)", "content_format", True, 5, 5,
|
|
372
|
+
f"{wc} words — in the lift-worthy range (1200–3500).", None,
|
|
373
|
+
)
|
|
374
|
+
if 800 <= wc < 1200 or 3500 < wc <= 5000:
|
|
375
|
+
return SignalResult(
|
|
376
|
+
"Word count (lift-worthy range)", "content_format", False, 5, 2,
|
|
377
|
+
f"{wc} words — outside the sweet spot (1200–3500).",
|
|
378
|
+
"Aim for 1500–2500 words. Thin pages and bloated pages both lose extraction priority.",
|
|
379
|
+
)
|
|
380
|
+
return SignalResult(
|
|
381
|
+
"Word count (lift-worthy range)", "content_format", False, 5, 0,
|
|
382
|
+
f"{wc} words — too {'thin' if wc < 800 else 'long'}.",
|
|
383
|
+
"Aim for 1500–2500 words with one clear answer per section.",
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def _check_first_paragraph_answer(soup: BeautifulSoup) -> SignalResult:
|
|
388
|
+
article = soup.find("article") or soup.find("main") or soup.body
|
|
389
|
+
if not article:
|
|
390
|
+
return SignalResult(
|
|
391
|
+
"Answer in first 60 words", "content_format", False, 5, 0,
|
|
392
|
+
"Could not detect a main content region.",
|
|
393
|
+
"Wrap the article body in `<article>` or `<main>` for clean extraction.",
|
|
394
|
+
)
|
|
395
|
+
# Skip empty / very-short paragraphs (captions, bylines, image text) before finding the real first paragraph
|
|
396
|
+
first_text = ""
|
|
397
|
+
first_word_n = 0
|
|
398
|
+
for p in article.find_all("p"):
|
|
399
|
+
text = p.get_text(strip=True)
|
|
400
|
+
if not text:
|
|
401
|
+
continue
|
|
402
|
+
words = re.findall(r"\b\w+\b", text)
|
|
403
|
+
if len(words) < 8:
|
|
404
|
+
continue
|
|
405
|
+
first_text = text
|
|
406
|
+
first_word_n = len(words)
|
|
407
|
+
break
|
|
408
|
+
|
|
409
|
+
if first_word_n == 0:
|
|
410
|
+
return SignalResult(
|
|
411
|
+
"Answer in first 60 words", "content_format", False, 5, 0,
|
|
412
|
+
"No non-trivial first paragraph found in main content.",
|
|
413
|
+
"Open with a real <p> of 15–60 words that directly answers the page's question.",
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
ok = 15 <= first_word_n <= 60 and first_text.endswith((".", "!", "?"))
|
|
417
|
+
return SignalResult(
|
|
418
|
+
"Answer in first 60 words", "content_format", ok, 5,
|
|
419
|
+
5 if ok else (2 if first_word_n >= 15 else 1),
|
|
420
|
+
f"First real paragraph: {first_word_n} words." if ok
|
|
421
|
+
else f"First real paragraph is {first_word_n} words — outside the AI-lift sweet spot of 15–60.",
|
|
422
|
+
None if ok else "Cut the intro. The first paragraph should be a 15–60 word direct answer to the page's core question.",
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _check_tldr(soup: BeautifulSoup) -> SignalResult:
|
|
427
|
+
text = soup.get_text(separator=" ", strip=True).lower()
|
|
428
|
+
has = bool(re.search(r"\btl;dr\b|\btldr\b|\bin short\b|\bsummary\b", text[:2000]))
|
|
429
|
+
return SignalResult(
|
|
430
|
+
"TL;DR / summary block near top", "content_format", has, 5, 5 if has else 0,
|
|
431
|
+
"TL;DR or summary detected near the top of the page." if has else "No TL;DR or summary block found.",
|
|
432
|
+
None if has else "Add a 50-word TL;DR after the H1. AI summarizers lift TL;DR blocks at much higher rates.",
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def _check_bold_answer(soup: BeautifulSoup) -> SignalResult:
|
|
437
|
+
article = soup.find("article") or soup.find("main") or soup.body
|
|
438
|
+
if not article:
|
|
439
|
+
return SignalResult("Bold answer in first section", "content_format", False, 5, 0,
|
|
440
|
+
"No main content region detected.", None)
|
|
441
|
+
first_p = article.find("p")
|
|
442
|
+
if not first_p:
|
|
443
|
+
return SignalResult("Bold answer in first section", "content_format", False, 5, 0,
|
|
444
|
+
"No paragraph found.", None)
|
|
445
|
+
has_bold = bool(first_p.find(["strong", "b"]))
|
|
446
|
+
return SignalResult(
|
|
447
|
+
"Bold answer in first section", "content_format", has_bold, 5, 5 if has_bold else 0,
|
|
448
|
+
"First paragraph contains a `<strong>` or `<b>` tag." if has_bold
|
|
449
|
+
else "First paragraph has no bold emphasis.",
|
|
450
|
+
None if has_bold else "Bold the literal answer in the first paragraph. Visual emphasis = extraction signal.",
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def _check_lists(soup: BeautifulSoup) -> SignalResult:
|
|
455
|
+
lists = soup.find_all(["ol", "ul"])
|
|
456
|
+
ok = len(lists) >= 3
|
|
457
|
+
return SignalResult(
|
|
458
|
+
"Structured lists", "content_format", ok, 5, min(5, len(lists)),
|
|
459
|
+
f"{len(lists)} list elements." if lists else "No ordered/unordered lists.",
|
|
460
|
+
None if ok else "Use real list markup (3+ ol/ul) — AI Overviews favor structured enumeration.",
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
# Authority
|
|
465
|
+
def _check_external_authority_links(soup: BeautifulSoup, page_host: str) -> SignalResult:
|
|
466
|
+
links = soup.find_all("a", href=True)
|
|
467
|
+
external_auth = 0
|
|
468
|
+
auth_tlds = (".gov", ".edu", ".ac.uk", "wikipedia.org")
|
|
469
|
+
for a in links:
|
|
470
|
+
try:
|
|
471
|
+
href = a["href"]
|
|
472
|
+
if not href.startswith("http"):
|
|
473
|
+
continue
|
|
474
|
+
host = urlparse(href).netloc.lower()
|
|
475
|
+
if host == page_host:
|
|
476
|
+
continue
|
|
477
|
+
if any(host.endswith(t) for t in auth_tlds):
|
|
478
|
+
external_auth += 1
|
|
479
|
+
except (KeyError, ValueError):
|
|
480
|
+
continue
|
|
481
|
+
ok = external_auth >= 2
|
|
482
|
+
return SignalResult(
|
|
483
|
+
"Authority outbound links", "authority", ok, 5, min(5, external_auth),
|
|
484
|
+
f"{external_auth} link(s) to .gov / .edu / Wikipedia." if external_auth
|
|
485
|
+
else "No outbound links to authority domains.",
|
|
486
|
+
None if ok else "Cite 2+ authority sources (.gov, .edu, Wikipedia). Provenance is an extraction signal.",
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _check_internal_links(soup: BeautifulSoup, page_host: str) -> SignalResult:
|
|
491
|
+
links = soup.find_all("a", href=True)
|
|
492
|
+
internal = 0
|
|
493
|
+
for a in links:
|
|
494
|
+
href = a["href"]
|
|
495
|
+
if href.startswith("/") and not href.startswith("//"):
|
|
496
|
+
internal += 1
|
|
497
|
+
elif href.startswith("http"):
|
|
498
|
+
try:
|
|
499
|
+
if urlparse(href).netloc.lower() == page_host:
|
|
500
|
+
internal += 1
|
|
501
|
+
except ValueError:
|
|
502
|
+
continue
|
|
503
|
+
ok = 5 <= internal <= 50
|
|
504
|
+
return SignalResult(
|
|
505
|
+
"Internal linking", "authority", ok, 4, 4 if ok else (2 if internal else 0),
|
|
506
|
+
f"{internal} internal links." if internal else "No internal links detected.",
|
|
507
|
+
None if ok else "5–50 internal links is the healthy range. Below = orphan; above = link soup.",
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def _check_author_byline(soup: BeautifulSoup, jsonld: list[dict]) -> SignalResult:
|
|
512
|
+
# JSON-LD author field
|
|
513
|
+
has_author = False
|
|
514
|
+
for entry in jsonld:
|
|
515
|
+
nodes = entry.get("@graph", [entry])
|
|
516
|
+
for n in nodes:
|
|
517
|
+
if isinstance(n, dict) and n.get("author"):
|
|
518
|
+
has_author = True
|
|
519
|
+
break
|
|
520
|
+
if has_author:
|
|
521
|
+
break
|
|
522
|
+
# Or visible byline
|
|
523
|
+
if not has_author:
|
|
524
|
+
text = soup.get_text(separator=" ", strip=True).lower()
|
|
525
|
+
has_author = bool(re.search(r"\bby [a-z]+\s+[a-z]+\b|written by\b|author:", text[:3000]))
|
|
526
|
+
return SignalResult(
|
|
527
|
+
"Named author / byline", "authority", has_author, 5, 5 if has_author else 0,
|
|
528
|
+
"Author named (schema or visible byline)." if has_author else "No author byline detected.",
|
|
529
|
+
None if has_author else "Name a real author with a profile page. E-E-A-T's first E = experience, and that means a person.",
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def _check_external_link_density(soup: BeautifulSoup, page_host: str) -> SignalResult:
|
|
534
|
+
links = soup.find_all("a", href=True)
|
|
535
|
+
external = 0
|
|
536
|
+
for a in links:
|
|
537
|
+
href = a["href"]
|
|
538
|
+
if href.startswith("http"):
|
|
539
|
+
try:
|
|
540
|
+
if urlparse(href).netloc.lower() != page_host:
|
|
541
|
+
external += 1
|
|
542
|
+
except ValueError:
|
|
543
|
+
continue
|
|
544
|
+
ok = 3 <= external <= 30
|
|
545
|
+
return SignalResult(
|
|
546
|
+
"Outbound link density", "authority", ok, 3, 3 if ok else 1,
|
|
547
|
+
f"{external} outbound links." if external else "No outbound links.",
|
|
548
|
+
None if ok else "Cite sources liberally (3–30 outbound). AI prioritizes content with clear provenance.",
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def _check_reviews_or_quotes(soup: BeautifulSoup) -> SignalResult:
|
|
553
|
+
blockquotes = soup.find_all("blockquote")
|
|
554
|
+
cites = soup.find_all("cite")
|
|
555
|
+
total = len(blockquotes) + len(cites)
|
|
556
|
+
ok = total >= 1
|
|
557
|
+
return SignalResult(
|
|
558
|
+
"Quotes & citations markup", "authority", ok, 3, 3 if ok else 0,
|
|
559
|
+
f"{len(blockquotes)} `<blockquote>`, {len(cites)} `<cite>`." if total else "No `<blockquote>` or `<cite>` tags.",
|
|
560
|
+
None if ok else "Wrap quotes in `<blockquote>`. AI summarizers credit quoted sources back to the original.",
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
# Freshness
|
|
565
|
+
def _check_last_modified(response, jsonld: list[dict]) -> SignalResult:
|
|
566
|
+
# Try Last-Modified header
|
|
567
|
+
lm = response.headers.get("last-modified") if response else None
|
|
568
|
+
# Try date in JSON-LD
|
|
569
|
+
date_modified = None
|
|
570
|
+
for entry in jsonld:
|
|
571
|
+
nodes = entry.get("@graph", [entry])
|
|
572
|
+
for n in nodes:
|
|
573
|
+
if isinstance(n, dict):
|
|
574
|
+
if n.get("dateModified"):
|
|
575
|
+
date_modified = n["dateModified"]
|
|
576
|
+
break
|
|
577
|
+
if date_modified:
|
|
578
|
+
break
|
|
579
|
+
has = bool(lm or date_modified)
|
|
580
|
+
detail = f"dateModified: {date_modified}" if date_modified else (f"Last-Modified: {lm}" if lm else "No modification date detected.")
|
|
581
|
+
return SignalResult(
|
|
582
|
+
"Last modified date", "freshness", has, 4, 4 if has else 0,
|
|
583
|
+
detail,
|
|
584
|
+
None if has else "Expose a dateModified in JSON-LD or via Last-Modified header. AI prioritizes fresh content.",
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def _check_dated_claims(soup: BeautifulSoup) -> SignalResult:
|
|
589
|
+
text = soup.get_text(separator=" ", strip=True)
|
|
590
|
+
# Look for "in 2024", "in 2025", "in 2026", "as of {month} 2026", etc.
|
|
591
|
+
has_date_phrases = bool(
|
|
592
|
+
re.search(r"\b(in|as of|updated|since)\s+(202[3-6]|january|february|march|april|may|june|july|august|september|october|november|december)\b", text, re.I)
|
|
593
|
+
)
|
|
594
|
+
return SignalResult(
|
|
595
|
+
"Dated claims in body", "freshness", has_date_phrases, 3, 3 if has_date_phrases else 0,
|
|
596
|
+
"Body contains explicit dated phrases." if has_date_phrases else "No dated phrases in body text.",
|
|
597
|
+
None if has_date_phrases else "Use 'As of {month} 2026' on every claim. Undated content reads stale to AI models.",
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
def _check_year_in_title(soup: BeautifulSoup) -> SignalResult:
|
|
602
|
+
title_tag = soup.find("h1") or soup.find("title")
|
|
603
|
+
title = title_tag.get_text(strip=True) if title_tag else ""
|
|
604
|
+
has_year = bool(re.search(r"\b(202[4-6])\b", title))
|
|
605
|
+
return SignalResult(
|
|
606
|
+
"Year in title", "freshness", has_year, 3, 3 if has_year else 0,
|
|
607
|
+
f"Title contains a year." if has_year else "No year in H1/title.",
|
|
608
|
+
None if has_year else "If the content is time-sensitive, include the year in the title (e.g. '... in 2026').",
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
# ============ ORCHESTRATOR ============
|
|
613
|
+
|
|
614
|
+
def score_url(url: str) -> ScoreResult:
|
|
615
|
+
url = _norm_url(url)
|
|
616
|
+
result = ScoreResult(
|
|
617
|
+
url=url,
|
|
618
|
+
score=0,
|
|
619
|
+
grade="F",
|
|
620
|
+
fetched_at=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
621
|
+
page_title=None,
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
response, err = _fetch(url)
|
|
625
|
+
if response is None:
|
|
626
|
+
result.notes.append(err or "Fetch failed for unknown reason.")
|
|
627
|
+
result.grade = "—"
|
|
628
|
+
return result
|
|
629
|
+
|
|
630
|
+
soup = BeautifulSoup(response.text, "lxml")
|
|
631
|
+
parsed_url = urlparse(str(response.url))
|
|
632
|
+
page_host = parsed_url.netloc.lower()
|
|
633
|
+
|
|
634
|
+
title_tag = soup.find("title")
|
|
635
|
+
result.page_title = title_tag.get_text(strip=True) if title_tag else None
|
|
636
|
+
|
|
637
|
+
jsonld = _extract_jsonld(soup)
|
|
638
|
+
schema_set = _schema_types(jsonld)
|
|
639
|
+
wc = _word_count(soup)
|
|
640
|
+
|
|
641
|
+
# Detect likely JS-rendered SPA: lots of script tags + very few semantic content tags.
|
|
642
|
+
# If we see ≥3 scripts but fewer than 3 <p> tags AND fewer than 50 words of body text,
|
|
643
|
+
# this page is almost certainly client-side rendered.
|
|
644
|
+
script_count = len(soup.find_all("script"))
|
|
645
|
+
semantic_p = len(soup.find_all("p"))
|
|
646
|
+
if script_count >= 3 and semantic_p < 3 and wc < 200:
|
|
647
|
+
result.notes.append(
|
|
648
|
+
"⚠ This page appears to be JavaScript-rendered. AI crawlers see what we see — not what you see in your browser. "
|
|
649
|
+
"The score reflects what's actually in the raw HTML response."
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
signals: list[SignalResult] = [
|
|
653
|
+
# Crawlability bucket (15 pts max)
|
|
654
|
+
_check_https(parsed_url),
|
|
655
|
+
_check_canonical(soup, url),
|
|
656
|
+
_check_viewport(soup),
|
|
657
|
+
_check_open_graph(soup),
|
|
658
|
+
_check_llms_txt(parsed_url),
|
|
659
|
+
_check_robots_ai(parsed_url),
|
|
660
|
+
|
|
661
|
+
# Schema & structure (30 pts max)
|
|
662
|
+
_check_article_schema(schema_set),
|
|
663
|
+
_check_faq_schema(schema_set),
|
|
664
|
+
_check_howto_schema(schema_set),
|
|
665
|
+
_check_breadcrumb_schema(schema_set),
|
|
666
|
+
_check_heading_structure(soup),
|
|
667
|
+
_check_definition_lists(soup),
|
|
668
|
+
_check_tables(soup),
|
|
669
|
+
_check_question_h2s(soup),
|
|
670
|
+
|
|
671
|
+
# Content / format (25 pts max)
|
|
672
|
+
_check_word_count(wc),
|
|
673
|
+
_check_first_paragraph_answer(soup),
|
|
674
|
+
_check_tldr(soup),
|
|
675
|
+
_check_bold_answer(soup),
|
|
676
|
+
_check_lists(soup),
|
|
677
|
+
|
|
678
|
+
# Authority (20 pts max)
|
|
679
|
+
_check_external_authority_links(soup, page_host),
|
|
680
|
+
_check_internal_links(soup, page_host),
|
|
681
|
+
_check_author_byline(soup, jsonld),
|
|
682
|
+
_check_external_link_density(soup, page_host),
|
|
683
|
+
_check_reviews_or_quotes(soup),
|
|
684
|
+
|
|
685
|
+
# Freshness (10 pts max)
|
|
686
|
+
_check_last_modified(response, jsonld),
|
|
687
|
+
_check_dated_claims(soup),
|
|
688
|
+
_check_year_in_title(soup),
|
|
689
|
+
]
|
|
690
|
+
|
|
691
|
+
# Aggregate per bucket
|
|
692
|
+
bucket_raw: dict[str, list[SignalResult]] = {b: [] for b in WEIGHTS}
|
|
693
|
+
for s in signals:
|
|
694
|
+
bucket_raw[s.bucket].append(s)
|
|
695
|
+
|
|
696
|
+
bucket_summary: dict[str, dict[str, float]] = {}
|
|
697
|
+
total_points = 0.0
|
|
698
|
+
for bucket, weight_max in WEIGHTS.items():
|
|
699
|
+
bs = bucket_raw[bucket]
|
|
700
|
+
bucket_weight = sum(s.weight for s in bs) or 1
|
|
701
|
+
bucket_earned = sum(s.points for s in bs)
|
|
702
|
+
# Normalize to the bucket weight cap
|
|
703
|
+
scaled = (bucket_earned / bucket_weight) * weight_max if bucket_weight else 0
|
|
704
|
+
bucket_summary[bucket] = {
|
|
705
|
+
"earned": round(scaled, 1),
|
|
706
|
+
"max": float(weight_max),
|
|
707
|
+
"percent": round(100 * scaled / weight_max, 1) if weight_max else 0,
|
|
708
|
+
}
|
|
709
|
+
total_points += scaled
|
|
710
|
+
|
|
711
|
+
score_int = max(0, min(100, round(total_points)))
|
|
712
|
+
result.score = score_int
|
|
713
|
+
result.grade = _grade(score_int)
|
|
714
|
+
result.bucket_scores = bucket_summary
|
|
715
|
+
result.signals = signals
|
|
716
|
+
result.fixes = [s.fix for s in signals if s.fix][:10] # top 10 actionable fixes
|
|
717
|
+
return result
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
# Convenience for serverless / CLI
|
|
721
|
+
def score_url_json(url: str) -> str:
|
|
722
|
+
return json.dumps(score_url(url).to_dict(), indent=2)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "oryon-score"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Score any URL for AI search readiness. Free open-source tool by Oryon."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [{ name = "Amaury", email = "amaury@seoryon.com" }]
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
requires-python = ">=3.10"
|
|
13
|
+
keywords = ["seo", "ai-search", "ai-overview", "geo", "aeo", "llm-citation", "schema", "audit"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Topic :: Internet :: WWW/HTTP",
|
|
23
|
+
]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"httpx>=0.27",
|
|
26
|
+
"beautifulsoup4>=4.12",
|
|
27
|
+
"lxml>=5.0",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://seoryon.com"
|
|
32
|
+
Repository = "https://github.com/SEOryon/oryon-score"
|
|
33
|
+
Issues = "https://github.com/SEOryon/oryon-score/issues"
|
|
34
|
+
|
|
35
|
+
[project.scripts]
|
|
36
|
+
oryon-score = "oryon_score.cli:main"
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.wheel]
|
|
39
|
+
packages = ["oryon_score"]
|
|
40
|
+
|
|
41
|
+
[tool.hatch.build.targets.sdist]
|
|
42
|
+
include = [
|
|
43
|
+
"/oryon_score",
|
|
44
|
+
"/README.md",
|
|
45
|
+
"/LICENSE",
|
|
46
|
+
"/pyproject.toml",
|
|
47
|
+
]
|