oryon-score 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .venv/
5
+ venv/
6
+ dist/
7
+ build/
8
+ .env
9
+ *.log
10
+ .vercel
11
+ .DS_Store
12
+ node_modules/
13
+ report.json
14
+ .pytest_cache/
15
+ .coverage
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Amaury / SEOryon
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,236 @@
1
+ Metadata-Version: 2.4
2
+ Name: oryon-score
3
+ Version: 0.1.0
4
+ Summary: Score any URL for AI search readiness. Free open-source tool by Oryon.
5
+ Project-URL: Homepage, https://seoryon.com
6
+ Project-URL: Repository, https://github.com/SEOryon/oryon-score
7
+ Project-URL: Issues, https://github.com/SEOryon/oryon-score/issues
8
+ Author-email: Amaury <amaury@seoryon.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: aeo,ai-overview,ai-search,audit,geo,llm-citation,schema,seo
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Internet :: WWW/HTTP
20
+ Requires-Python: >=3.10
21
+ Requires-Dist: beautifulsoup4>=4.12
22
+ Requires-Dist: httpx>=0.27
23
+ Requires-Dist: lxml>=5.0
24
+ Description-Content-Type: text/markdown
25
+
26
+ # Oryon AI Search Readiness Score
27
+
28
+ > Score any URL for AI search readiness. Free, open-source, no signup.
29
+ > Try it live → **[score.seoryon.com](https://score.seoryon.com)**
30
+
31
+ [![License: MIT](https://img.shields.io/badge/License-MIT-9990FF.svg)](LICENSE)
32
+ [![PyPI](https://img.shields.io/badge/pip-oryon--score-9990FF)](https://pypi.org/project/oryon-score/)
33
+ [![GitHub stars](https://img.shields.io/github/stars/SEOryon/oryon-score?style=social)](https://github.com/SEOryon/oryon-score)
34
+
35
+ ---
36
+
37
+ ## What this is
38
+
39
+ A free tool that scores any URL on **27 signals** AI search engines use to decide what to cite, and returns the top fixes — ranked by impact.
40
+
41
+ Built and maintained by **[Oryon](https://seoryon.com)** — the SEO engine that writes and publishes articles built to rank AND get cited. This is the demo. Your whole site is the product.
42
+
43
+ ```
44
+ $ oryon-score https://example.com/blog/ai-overview-guide
45
+
46
+ Oryon AI Search Readiness Score
47
+ https://example.com/blog/ai-overview-guide
48
+ The AI Overview Guide: How to Get Cited by Google AI
49
+
50
+ 62/100 · Grade C
51
+
52
+ By bucket
53
+ Schema Structure ████████████████░░░░░░░░ 18.6/30
54
+ Content Format █████████████████░░░░░░░ 17.0/25
55
+ Authority ████████████░░░░░░░░░░░░ 12.0/20
56
+ Crawlability ███████████████████░░░░░ 11.4/15
57
+ Freshness ███░░░░░░░░░░░░░░░░░░░░░ 3.0/10
58
+
59
+ Top fixes (in order of impact)
60
+ ✗ FAQ schema (No FAQPage schema.)
61
+ → Wrap your FAQ section in FAQPage JSON-LD — highest-correlation signal.
62
+ ✗ TL;DR / summary block near top (No TL;DR or summary block found.)
63
+ → Add a 50-word TL;DR after the H1. AI summarizers lift these at much higher rates.
64
+ ✗ Last modified date (No modification date detected.)
65
+ → Expose a dateModified in JSON-LD or via Last-Modified header.
66
+ ...
67
+
68
+ Want continuous scoring across every page on your site?
69
+ → Try Oryon free for 3 days: seoryon.com
70
+ ```
71
+
72
+ ---
73
+
74
+ ## Why this exists
75
+
76
+ The market for AI citation tracking is dominated by dashboards starting at **€295/mo**. Most SEO teams just need to answer one question: *"is my page set up to get cited at all?"* That answer should be free.
77
+
78
+ This tool gives you that answer in 10 seconds. No signup. No tokens. Just paste a URL.
79
+
80
+ If you want **continuous scoring across every URL on your site**, plus AI citation tracking across ChatGPT / Perplexity / Gemini / Google AI, plus a writer that ships extractable articles automatically — that's [Oryon](https://seoryon.com). From €39/mo.
81
+
82
+ ---
83
+
84
+ ## What it actually checks
85
+
86
+ 27 signals across 5 buckets that AI Overviews + LLM citation systems actually weight:
87
+
88
+ ### Schema & structure (30 pts)
89
+ - Article / BlogPosting JSON-LD
90
+ - **FAQPage schema** (highest-correlation signal)
91
+ - HowTo schema
92
+ - BreadcrumbList schema
93
+ - Heading hierarchy (1 H1, ≥3 H2s)
94
+ - Definition lists (`<dl>`)
95
+ - Table markup
96
+ - Question-style H2s
97
+
98
+ ### Content / format (25 pts)
99
+ - Word count in the 1200–3500 sweet spot
100
+ - Direct answer in the first 60 words
101
+ - TL;DR / summary block near the top
102
+ - Bold emphasis in the first section
103
+ - 3+ structured lists
104
+
105
+ ### Authority (20 pts)
106
+ - Outbound links to .gov / .edu / Wikipedia
107
+ - 5–50 internal links (healthy range)
108
+ - Named author / byline (E-E-A-T)
109
+ - Outbound link density (3–30)
110
+ - `<blockquote>` / `<cite>` markup
111
+
112
+ ### Crawlability (15 pts)
113
+ - HTTPS
114
+ - Canonical URL
115
+ - Mobile viewport
116
+ - Open Graph (≥3 og:* tags)
117
+ - `llms.txt` at site root
118
+ - `robots.txt` allows GPTBot, ClaudeBot, PerplexityBot, CCBot, Google-Extended, etc.
119
+
120
+ ### Freshness (10 pts)
121
+ - `dateModified` or `Last-Modified` header
122
+ - Dated phrases in body ("as of May 2026")
123
+ - Year in title
124
+
125
+ Each signal has a **specific fix** — what to change, where to change it, why it matters. No fluff.
126
+
127
+ ---
128
+
129
+ ## Install
130
+
131
+ ### Web (no install)
132
+ Just open **[score.seoryon.com](https://score.seoryon.com)** and paste a URL.
133
+
134
+ ### CLI (pip)
135
+ ```bash
136
+ pip install oryon-score
137
+
138
+ oryon-score https://example.com/blog/your-best-article
139
+ oryon-score https://example.com --json
140
+ oryon-score https://example.com --out report.json
141
+ ```
142
+
143
+ Requires Python 3.10+.
144
+
145
+ ### From source
146
+ ```bash
147
+ git clone https://github.com/SEOryon/oryon-score
148
+ cd oryon-score
149
+ pip install -e .
150
+
151
+ oryon-score https://example.com
152
+ ```
153
+
154
+ ### Use in Python
155
+ ```python
156
+ from oryon_score import score_url
157
+
158
+ result = score_url("https://example.com/blog/post")
159
+ print(result.score, result.grade)
160
+ for fix in result.fixes:
161
+ print(" -", fix)
162
+ ```
163
+
164
+ ---
165
+
166
+ ## Deploy your own (Vercel)
167
+
168
+ The `web/` + `api/` folders deploy as a Vercel project. Two files matter:
169
+
170
+ - `web/index.html` — the public scoring page
171
+ - `api/score.py` — the Python serverless endpoint
172
+
173
+ ```bash
174
+ npm i -g vercel
175
+ vercel
176
+ ```
177
+
178
+ Done. Your fork is now live at `your-project.vercel.app`. Add a custom domain in Vercel's dashboard.
179
+
180
+ ---
181
+
182
+ ## Sample output
183
+
184
+ See [`examples/example_output.json`](examples/example_output.json) for the full JSON shape returned by the API and the `--json` flag.
185
+
186
+ The web UI renders the same data with bucket bars, top fixes, and a "what's working" passlist.
187
+
188
+ ---
189
+
190
+ ## Why some sites score lower than they "should"
191
+
192
+ A score is not a verdict. It's a snapshot of *extractable* signals on the page itself. Three things this tool **does not** measure:
193
+
194
+ 1. **Domain authority / backlink graph.** Out of scope. AI citation correlates with authority, but measuring it requires a third-party API and we kept this free.
195
+ 2. **Whether the page is actually cited today.** Use the [Citation Intelligence MCP](https://github.com/AutomateLab-tech/citation-intelligence) by AutomateLab for live LLM citation data.
196
+ 3. **Brand authority signals.** Wikipedia mentions, press coverage, Reddit references — they matter, and the tool flags some, but it can't grade them holistically.
197
+
198
+ For all three layers, see [Oryon](https://seoryon.com) — that's what the paid product does.
199
+
200
+ ---
201
+
202
+ ## How it stays free
203
+
204
+ - Runs on Vercel's free Python runtime
205
+ - No third-party APIs, no API keys to maintain
206
+ - No tracking, no telemetry, no user accounts
207
+ - Self-hostable in one click
208
+
209
+ Open source under MIT. Fork it, run it on your own infra, change it, ship it.
210
+
211
+ ---
212
+
213
+ ## Inspired by
214
+
215
+ This tool's structured-signal approach was inspired by [`citation-intelligence`](https://github.com/AutomateLab-tech/citation-intelligence) by AutomateLab — a self-hosted MCP server for measuring LLM citation visibility. Go check it out if you want programmatic citation data from inside Claude Code or Cursor. This tool solves a different layer (page readiness, not live citation queries) and is original work.
216
+
217
+ ---
218
+
219
+ ## Contributing
220
+
221
+ PRs welcome. Especially:
222
+ - New signals (anything with a published correlation study)
223
+ - Translations of the web UI
224
+ - WordPress / Webflow / Shopify integrations
225
+ - A GitHub Action that scores changed URLs on every PR
226
+
227
+ See [CONTRIBUTING.md](CONTRIBUTING.md).
228
+
229
+ ---
230
+
231
+ ## License
232
+
233
+ MIT — see [LICENSE](LICENSE).
234
+
235
+ Built by **[Oryon](https://seoryon.com)** · Your Organic Growth Engine.
236
+ Follow [@SEOryon](https://instagram.com/SEOryon) for SEO content that doesn't lie.
@@ -0,0 +1,211 @@
1
+ # Oryon AI Search Readiness Score
2
+
3
+ > Score any URL for AI search readiness. Free, open-source, no signup.
4
+ > Try it live → **[score.seoryon.com](https://score.seoryon.com)**
5
+
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-9990FF.svg)](LICENSE)
7
+ [![PyPI](https://img.shields.io/badge/pip-oryon--score-9990FF)](https://pypi.org/project/oryon-score/)
8
+ [![GitHub stars](https://img.shields.io/github/stars/SEOryon/oryon-score?style=social)](https://github.com/SEOryon/oryon-score)
9
+
10
+ ---
11
+
12
+ ## What this is
13
+
14
+ A free tool that scores any URL on **27 signals** AI search engines use to decide what to cite, and returns the top fixes — ranked by impact.
15
+
16
+ Built and maintained by **[Oryon](https://seoryon.com)** — the SEO engine that writes and publishes articles built to rank AND get cited. This is the demo. Your whole site is the product.
17
+
18
+ ```
19
+ $ oryon-score https://example.com/blog/ai-overview-guide
20
+
21
+ Oryon AI Search Readiness Score
22
+ https://example.com/blog/ai-overview-guide
23
+ The AI Overview Guide: How to Get Cited by Google AI
24
+
25
+ 62/100 · Grade C
26
+
27
+ By bucket
28
+ Schema Structure ████████████████░░░░░░░░ 18.6/30
29
+ Content Format █████████████████░░░░░░░ 17.0/25
30
+ Authority ████████████░░░░░░░░░░░░ 12.0/20
31
+ Crawlability ███████████████████░░░░░ 11.4/15
32
+ Freshness ███░░░░░░░░░░░░░░░░░░░░░ 3.0/10
33
+
34
+ Top fixes (in order of impact)
35
+ ✗ FAQ schema (No FAQPage schema.)
36
+ → Wrap your FAQ section in FAQPage JSON-LD — highest-correlation signal.
37
+ ✗ TL;DR / summary block near top (No TL;DR or summary block found.)
38
+ → Add a 50-word TL;DR after the H1. AI summarizers lift these at much higher rates.
39
+ ✗ Last modified date (No modification date detected.)
40
+ → Expose a dateModified in JSON-LD or via Last-Modified header.
41
+ ...
42
+
43
+ Want continuous scoring across every page on your site?
44
+ → Try Oryon free for 3 days: seoryon.com
45
+ ```
46
+
47
+ ---
48
+
49
+ ## Why this exists
50
+
51
+ The market for AI citation tracking is dominated by dashboards starting at **€295/mo**. Most SEO teams just need to answer one question: *"is my page set up to get cited at all?"* That answer should be free.
52
+
53
+ This tool gives you that answer in 10 seconds. No signup. No tokens. Just paste a URL.
54
+
55
+ If you want **continuous scoring across every URL on your site**, plus AI citation tracking across ChatGPT / Perplexity / Gemini / Google AI, plus a writer that ships extractable articles automatically — that's [Oryon](https://seoryon.com). From €39/mo.
56
+
57
+ ---
58
+
59
+ ## What it actually checks
60
+
61
+ 27 signals across 5 buckets that AI Overviews + LLM citation systems actually weight:
62
+
63
+ ### Schema & structure (30 pts)
64
+ - Article / BlogPosting JSON-LD
65
+ - **FAQPage schema** (highest-correlation signal)
66
+ - HowTo schema
67
+ - BreadcrumbList schema
68
+ - Heading hierarchy (1 H1, ≥3 H2s)
69
+ - Definition lists (`<dl>`)
70
+ - Table markup
71
+ - Question-style H2s
72
+
73
+ ### Content / format (25 pts)
74
+ - Word count in the 1200–3500 sweet spot
75
+ - Direct answer in the first 60 words
76
+ - TL;DR / summary block near the top
77
+ - Bold emphasis in the first section
78
+ - 3+ structured lists
79
+
80
+ ### Authority (20 pts)
81
+ - Outbound links to .gov / .edu / Wikipedia
82
+ - 5–50 internal links (healthy range)
83
+ - Named author / byline (E-E-A-T)
84
+ - Outbound link density (3–30)
85
+ - `<blockquote>` / `<cite>` markup
86
+
87
+ ### Crawlability (15 pts)
88
+ - HTTPS
89
+ - Canonical URL
90
+ - Mobile viewport
91
+ - Open Graph (≥3 og:* tags)
92
+ - `llms.txt` at site root
93
+ - `robots.txt` allows GPTBot, ClaudeBot, PerplexityBot, CCBot, Google-Extended, etc.
94
+
95
+ ### Freshness (10 pts)
96
+ - `dateModified` or `Last-Modified` header
97
+ - Dated phrases in body ("as of May 2026")
98
+ - Year in title
99
+
100
+ Each signal has a **specific fix** — what to change, where to change it, why it matters. No fluff.
101
+
102
+ ---
103
+
104
+ ## Install
105
+
106
+ ### Web (no install)
107
+ Just open **[score.seoryon.com](https://score.seoryon.com)** and paste a URL.
108
+
109
+ ### CLI (pip)
110
+ ```bash
111
+ pip install oryon-score
112
+
113
+ oryon-score https://example.com/blog/your-best-article
114
+ oryon-score https://example.com --json
115
+ oryon-score https://example.com --out report.json
116
+ ```
117
+
118
+ Requires Python 3.10+.
119
+
120
+ ### From source
121
+ ```bash
122
+ git clone https://github.com/SEOryon/oryon-score
123
+ cd oryon-score
124
+ pip install -e .
125
+
126
+ oryon-score https://example.com
127
+ ```
128
+
129
+ ### Use in Python
130
+ ```python
131
+ from oryon_score import score_url
132
+
133
+ result = score_url("https://example.com/blog/post")
134
+ print(result.score, result.grade)
135
+ for fix in result.fixes:
136
+ print(" -", fix)
137
+ ```
138
+
139
+ ---
140
+
141
+ ## Deploy your own (Vercel)
142
+
143
+ The `web/` + `api/` folders deploy as a Vercel project. Two files matter:
144
+
145
+ - `web/index.html` — the public scoring page
146
+ - `api/score.py` — the Python serverless endpoint
147
+
148
+ ```bash
149
+ npm i -g vercel
150
+ vercel
151
+ ```
152
+
153
+ Done. Your fork is now live at `your-project.vercel.app`. Add a custom domain in Vercel's dashboard.
154
+
155
+ ---
156
+
157
+ ## Sample output
158
+
159
+ See [`examples/example_output.json`](examples/example_output.json) for the full JSON shape returned by the API and the `--json` flag.
160
+
161
+ The web UI renders the same data with bucket bars, top fixes, and a "what's working" passlist.
162
+
163
+ ---
164
+
165
+ ## Why some sites score lower than they "should"
166
+
167
+ A score is not a verdict. It's a snapshot of *extractable* signals on the page itself. Three things this tool **does not** measure:
168
+
169
+ 1. **Domain authority / backlink graph.** Out of scope. AI citation correlates with authority, but measuring it requires a third-party API and we kept this free.
170
+ 2. **Whether the page is actually cited today.** Use the [Citation Intelligence MCP](https://github.com/AutomateLab-tech/citation-intelligence) by AutomateLab for live LLM citation data.
171
+ 3. **Brand authority signals.** Wikipedia mentions, press coverage, Reddit references — they matter, and the tool flags some, but it can't grade them holistically.
172
+
173
+ For all three layers, see [Oryon](https://seoryon.com) — that's what the paid product does.
174
+
175
+ ---
176
+
177
+ ## How it stays free
178
+
179
+ - Runs on Vercel's free Python runtime
180
+ - No third-party APIs, no API keys to maintain
181
+ - No tracking, no telemetry, no user accounts
182
+ - Self-hostable in one click
183
+
184
+ Open source under MIT. Fork it, run it on your own infra, change it, ship it.
185
+
186
+ ---
187
+
188
+ ## Inspired by
189
+
190
+ This tool's structured-signal approach was inspired by [`citation-intelligence`](https://github.com/AutomateLab-tech/citation-intelligence) by AutomateLab — a self-hosted MCP server for measuring LLM citation visibility. Go check it out if you want programmatic citation data from inside Claude Code or Cursor. This tool solves a different layer (page readiness, not live citation queries) and is original work.
191
+
192
+ ---
193
+
194
+ ## Contributing
195
+
196
+ PRs welcome. Especially:
197
+ - New signals (anything with a published correlation study)
198
+ - Translations of the web UI
199
+ - WordPress / Webflow / Shopify integrations
200
+ - A GitHub Action that scores changed URLs on every PR
201
+
202
+ See [CONTRIBUTING.md](CONTRIBUTING.md).
203
+
204
+ ---
205
+
206
+ ## License
207
+
208
+ MIT — see [LICENSE](LICENSE).
209
+
210
+ Built by **[Oryon](https://seoryon.com)** · Your Organic Growth Engine.
211
+ Follow [@SEOryon](https://instagram.com/SEOryon) for SEO content that doesn't lie.
@@ -0,0 +1,4 @@
1
+ from .score import score_url, score_url_json, ScoreResult, SignalResult
2
+
3
+ __version__ = "0.1.0"
4
+ __all__ = ["score_url", "score_url_json", "ScoreResult", "SignalResult"]
@@ -0,0 +1,120 @@
1
+ """
2
+ oryon-score CLI — score any URL for AI search readiness.
3
+
4
+ Usage:
5
+ oryon-score https://example.com
6
+ oryon-score https://example.com --json
7
+ oryon-score https://example.com --out report.json
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import json
13
+ import sys
14
+
15
+ from .score import score_url
16
+
17
+ # ANSI colors — keep it minimal, fall back gracefully
18
+ def _supports_color() -> bool:
19
+ return sys.stdout.isatty() and not bool(__import__("os").environ.get("NO_COLOR"))
20
+
21
+ C = {
22
+ "reset": "\033[0m",
23
+ "bold": "\033[1m",
24
+ "dim": "\033[2m",
25
+ "violet": "\033[38;5;99m",
26
+ "green": "\033[32m",
27
+ "red": "\033[31m",
28
+ "yellow": "\033[33m",
29
+ "gray": "\033[90m",
30
+ } if _supports_color() else {k: "" for k in ["reset", "bold", "dim", "violet", "green", "red", "yellow", "gray"]}
31
+
32
+
33
+ def _bar(percent: float, width: int = 24) -> str:
34
+ filled = int(round(percent / 100 * width))
35
+ return f"{C['violet']}{'█' * filled}{C['gray']}{'·' * (width - filled)}{C['reset']}"
36
+
37
+
38
+ def _print_report(result):
39
+ print()
40
+ print(f" {C['bold']}Oryon AI Search Readiness Score{C['reset']}")
41
+ print(f" {C['gray']}{result.url}{C['reset']}")
42
+ if result.page_title:
43
+ print(f" {C['dim']}{result.page_title[:80]}{C['reset']}")
44
+ print()
45
+
46
+ # Fetch failed — show that clearly instead of a fake zero score
47
+ if result.grade == "—" or (result.score == 0 and not result.signals):
48
+ msg = result.notes[0] if result.notes else "Fetch failed."
49
+ print(f" {C['red']}{C['bold']}✗ Could not score this URL{C['reset']}")
50
+ print(f" {C['gray']}{msg}{C['reset']}")
51
+ print()
52
+ print(f" {C['dim']}Common causes: bot protection (Cloudflare), paywall, JS-only site,{C['reset']}")
53
+ print(f" {C['dim']}or the page requires login. Try a different URL on the same domain.{C['reset']}")
54
+ print()
55
+ return
56
+
57
+ grade_color = C["green"] if result.score >= 70 else (C["yellow"] if result.score >= 50 else C["red"])
58
+ print(f" {grade_color}{C['bold']}{result.score}/100 · Grade {result.grade}{C['reset']}")
59
+ print()
60
+
61
+ print(f" {C['bold']}By bucket{C['reset']}")
62
+ for bucket, info in result.bucket_scores.items():
63
+ name = bucket.replace("_", " ").title()
64
+ bar = _bar(info["percent"])
65
+ print(f" {name:<22} {bar} {info['earned']:>4}/{int(info['max']):<3}")
66
+ print()
67
+
68
+ passed = [s for s in result.signals if s.passed]
69
+ failed = [s for s in result.signals if not s.passed]
70
+
71
+ if failed:
72
+ print(f" {C['bold']}Top fixes{C['reset']} {C['gray']}(in order of impact){C['reset']}")
73
+ for s in sorted(failed, key=lambda s: -s.weight)[:8]:
74
+ print(f" {C['red']}✗{C['reset']} {C['bold']}{s.name}{C['reset']} {C['gray']}({s.detail}){C['reset']}")
75
+ if s.fix:
76
+ print(f" {C['gray']}→{C['reset']} {s.fix}")
77
+ print()
78
+
79
+ if passed:
80
+ print(f" {C['bold']}What's working{C['reset']}")
81
+ for s in passed[:6]:
82
+ print(f" {C['green']}✓{C['reset']} {s.name} {C['gray']}— {s.detail}{C['reset']}")
83
+ print()
84
+
85
+ print(f" {C['dim']}Want continuous scoring across every page on your site?{C['reset']}")
86
+ print(f" {C['violet']}→ Try Oryon free for 3 days: seoryon.com{C['reset']}")
87
+ print()
88
+
89
+
90
+ def main() -> int:
91
+ p = argparse.ArgumentParser(
92
+ prog="oryon-score",
93
+ description="Score any URL for AI search readiness. Inspired by Oryon.",
94
+ )
95
+ p.add_argument("url", help="The URL to score (with or without https://)")
96
+ p.add_argument("--json", action="store_true", help="Output JSON only, no human format")
97
+ p.add_argument("--out", help="Write JSON report to file")
98
+ args = p.parse_args()
99
+
100
+ try:
101
+ result = score_url(args.url)
102
+ except Exception as e:
103
+ print(f"Error: {e}", file=sys.stderr)
104
+ return 2
105
+
106
+ if args.out:
107
+ with open(args.out, "w") as f:
108
+ json.dump(result.to_dict(), f, indent=2)
109
+ print(f"Saved report to {args.out}")
110
+
111
+ if args.json:
112
+ print(json.dumps(result.to_dict(), indent=2))
113
+ else:
114
+ _print_report(result)
115
+
116
+ return 0 if result.score >= 50 else 1
117
+
118
+
119
+ if __name__ == "__main__":
120
+ sys.exit(main())
@@ -0,0 +1,722 @@
1
+ """
2
+ Oryon AI Search Readiness Score
3
+ Core scoring engine. Takes a URL, returns 0-100 score + per-signal results + fixes.
4
+
5
+ No LLM calls. No API keys. Pure HTML parsing + signal heuristics.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import re
11
+ import time
12
+ from dataclasses import dataclass, field, asdict
13
+ from typing import Any
14
+ from urllib.parse import urljoin, urlparse
15
+
16
+ import httpx
17
+ from bs4 import BeautifulSoup
18
+
19
+ # Browser-like UA to get past basic bot walls (Cloudflare, etc.).
20
+ # We still identify in Accept headers + an optional X-Tool header so we're
21
+ # not pretending to be anything we're not.
22
+ UA = (
23
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
24
+ "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 "
25
+ "OryonAISearchScore/1.0 (+https://seoryon.com)"
26
+ )
27
+ TIMEOUT_S = 15.0
28
+
29
+ # AI crawler user agents we check robots.txt against
30
+ AI_CRAWLERS = [
31
+ "GPTBot",
32
+ "ClaudeBot",
33
+ "PerplexityBot",
34
+ "CCBot",
35
+ "Google-Extended",
36
+ "Applebot-Extended",
37
+ "Bytespider",
38
+ "anthropic-ai",
39
+ "FacebookBot",
40
+ ]
41
+
42
+ # 5 buckets that add up to 100
43
+ WEIGHTS = {
44
+ "schema_structure": 30,
45
+ "content_format": 25,
46
+ "authority": 20,
47
+ "crawlability": 15,
48
+ "freshness": 10,
49
+ }
50
+
51
+
52
+ @dataclass
53
+ class SignalResult:
54
+ name: str
55
+ bucket: str
56
+ passed: bool
57
+ weight: float # points possible if signal == binary; or weight share within bucket
58
+ points: float # actual points earned (0..weight)
59
+ detail: str
60
+ fix: str | None = None # 1-line actionable fix when failed
61
+
62
+
63
+ @dataclass
64
+ class ScoreResult:
65
+ url: str
66
+ score: int # 0-100
67
+ grade: str # A+ A B C D F
68
+ fetched_at: str
69
+ page_title: str | None
70
+ bucket_scores: dict[str, dict[str, float]] = field(default_factory=dict)
71
+ signals: list[SignalResult] = field(default_factory=list)
72
+ fixes: list[str] = field(default_factory=list)
73
+ notes: list[str] = field(default_factory=list)
74
+
75
+ def to_dict(self) -> dict[str, Any]:
76
+ d = asdict(self)
77
+ d["signals"] = [asdict(s) for s in self.signals]
78
+ return d
79
+
80
+
81
+ def _grade(score: int) -> str:
82
+ if score >= 90: return "A+"
83
+ if score >= 80: return "A"
84
+ if score >= 70: return "B"
85
+ if score >= 60: return "C"
86
+ if score >= 45: return "D"
87
+ return "F"
88
+
89
+
90
+ def _norm_url(url: str) -> str:
91
+ if not url.startswith(("http://", "https://")):
92
+ url = "https://" + url
93
+ return url
94
+
95
+
96
+ def _fetch(url: str) -> tuple[httpx.Response | None, str | None]:
97
+ headers = {
98
+ "User-Agent": UA,
99
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
100
+ "Accept-Language": "en-US,en;q=0.9",
101
+ "Accept-Encoding": "gzip, deflate, br",
102
+ "Cache-Control": "no-cache",
103
+ "Pragma": "no-cache",
104
+ "X-Tool": "OryonAISearchScore/1.0",
105
+ }
106
+ try:
107
+ with httpx.Client(
108
+ timeout=TIMEOUT_S, follow_redirects=True, headers=headers
109
+ ) as client:
110
+ r = client.get(url)
111
+ if r.status_code >= 400:
112
+ sc = r.status_code
113
+ if sc == 404:
114
+ msg = f"HTTP 404 — that URL doesn't exist. Double-check the path."
115
+ elif sc in (401, 403):
116
+ msg = f"HTTP {sc} — the site blocked our request (bot protection / paywall / login required)."
117
+ elif sc == 429:
118
+ msg = f"HTTP 429 — rate-limited. Wait a minute and try again."
119
+ elif sc >= 500:
120
+ msg = f"HTTP {sc} — the site is broken right now. Try again later."
121
+ else:
122
+ msg = f"HTTP {sc} {r.reason_phrase}"
123
+ return None, msg
124
+ return r, None
125
+ except httpx.HTTPError as e:
126
+ return None, f"Fetch failed: {e!s}"
127
+
128
+
129
+ def _fetch_text(url: str) -> str:
130
+ r, _ = _fetch(url)
131
+ if r is None or r.status_code >= 400:
132
+ return ""
133
+ return r.text
134
+
135
+
136
+ # ============ SIGNAL CHECKS ============
137
+
138
+ def _check_https(parsed_url) -> SignalResult:
139
+ ok = parsed_url.scheme == "https"
140
+ return SignalResult(
141
+ name="HTTPS",
142
+ bucket="crawlability",
143
+ passed=ok,
144
+ weight=2,
145
+ points=2 if ok else 0,
146
+ detail="Served over HTTPS." if ok else "Page is not HTTPS.",
147
+ fix=None if ok else "Move the site to HTTPS — AI crawlers and Google demote http URLs.",
148
+ )
149
+
150
+
151
+ def _check_canonical(soup: BeautifulSoup, url: str) -> SignalResult:
152
+ tag = soup.find("link", attrs={"rel": "canonical"})
153
+ if not tag or not tag.get("href"):
154
+ return SignalResult(
155
+ "Canonical URL", "crawlability", False, 2, 0,
156
+ "No canonical link tag found.",
157
+ "Add a `<link rel=\"canonical\" href=\"...\">` to lock the canonical URL for this page.",
158
+ )
159
+ return SignalResult(
160
+ "Canonical URL", "crawlability", True, 2, 2,
161
+ f"Canonical: {tag['href']}", None,
162
+ )
163
+
164
+
165
+ def _check_viewport(soup: BeautifulSoup) -> SignalResult:
166
+ tag = soup.find("meta", attrs={"name": "viewport"})
167
+ ok = bool(tag and tag.get("content"))
168
+ return SignalResult(
169
+ "Mobile viewport", "crawlability", ok, 2, 2 if ok else 0,
170
+ "Mobile viewport meta present." if ok else "No mobile viewport meta tag.",
171
+ None if ok else "Add `<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">` to head.",
172
+ )
173
+
174
+
175
+ def _check_open_graph(soup: BeautifulSoup) -> SignalResult:
176
+ og_tags = soup.find_all("meta", attrs={"property": re.compile(r"^og:")})
177
+ ok = len(og_tags) >= 3
178
+ return SignalResult(
179
+ "Open Graph tags", "crawlability", ok, 2, 2 if ok else 0,
180
+ f"Found {len(og_tags)} og:* tags." if og_tags else "No Open Graph tags.",
181
+ None if ok else "Add og:title, og:description, og:image, og:url — AI summarizers lift them.",
182
+ )
183
+
184
+
185
+ def _check_llms_txt(parsed_url) -> SignalResult:
186
+ base = f"{parsed_url.scheme}://{parsed_url.netloc}"
187
+ r, _ = _fetch(f"{base}/llms.txt")
188
+ ok = r is not None and r.status_code == 200 and len(r.text) > 50
189
+ return SignalResult(
190
+ "llms.txt file", "crawlability", ok, 3, 3 if ok else 0,
191
+ "llms.txt present at site root." if ok else "No /llms.txt found.",
192
+ None if ok else "Add a /llms.txt file at the site root following llmstxt.org spec.",
193
+ )
194
+
195
+
196
+ def _check_robots_ai(parsed_url) -> SignalResult:
197
+ base = f"{parsed_url.scheme}://{parsed_url.netloc}"
198
+ r, _ = _fetch(f"{base}/robots.txt")
199
+ if r is None or r.status_code != 200:
200
+ return SignalResult(
201
+ "Robots allows AI crawlers", "crawlability", True, 4, 4,
202
+ "No robots.txt — AI crawlers can fetch by default.", None,
203
+ )
204
+ text = r.text.lower()
205
+ blocked = []
206
+ for bot in AI_CRAWLERS:
207
+ pat = re.compile(
208
+ rf"user-agent:\s*{re.escape(bot.lower())}\s*\n((?:[a-z-]+:.*\n?)+)",
209
+ re.IGNORECASE,
210
+ )
211
+ for m in pat.finditer(text):
212
+ block = m.group(1)
213
+ if re.search(r"disallow:\s*/(\s|$)", block):
214
+ blocked.append(bot)
215
+ break
216
+ if blocked:
217
+ return SignalResult(
218
+ "Robots allows AI crawlers", "crawlability", False, 4, max(0, 4 - len(blocked)),
219
+ f"Blocking these AI crawlers via robots.txt: {', '.join(blocked)}.",
220
+ f"Remove the Disallow rules for: {', '.join(blocked)} unless intentional. AI overviews need access to cite you.",
221
+ )
222
+ return SignalResult(
223
+ "Robots allows AI crawlers", "crawlability", True, 4, 4,
224
+ "All major AI crawlers allowed in robots.txt.", None,
225
+ )
226
+
227
+
228
+ # Schema & structure
229
+ def _extract_jsonld(soup: BeautifulSoup) -> list[dict]:
230
+ out = []
231
+ for tag in soup.find_all("script", attrs={"type": "application/ld+json"}):
232
+ try:
233
+ data = json.loads(tag.string or "")
234
+ except (ValueError, TypeError):
235
+ continue
236
+ if isinstance(data, dict):
237
+ out.append(data)
238
+ elif isinstance(data, list):
239
+ out.extend([d for d in data if isinstance(d, dict)])
240
+ return out
241
+
242
+
243
+ def _schema_types(jsonld: list[dict]) -> set[str]:
244
+ types: set[str] = set()
245
+ for entry in jsonld:
246
+ graph = entry.get("@graph", [entry]) if "@graph" in entry else [entry]
247
+ for node in graph:
248
+ t = node.get("@type")
249
+ if isinstance(t, str):
250
+ types.add(t)
251
+ elif isinstance(t, list):
252
+ types.update(str(x) for x in t)
253
+ return types
254
+
255
+
256
+ def _check_article_schema(types: set[str]) -> SignalResult:
257
+ has = bool(types & {"Article", "NewsArticle", "BlogPosting", "TechArticle"})
258
+ return SignalResult(
259
+ "Article schema", "schema_structure", has, 5, 5 if has else 0,
260
+ f"Article-type schema present: {sorted(types & {'Article', 'NewsArticle', 'BlogPosting', 'TechArticle'})}" if has
261
+ else "No Article / BlogPosting / NewsArticle schema found.",
262
+ None if has else "Add JSON-LD with @type: Article (or BlogPosting). Required for most AI Overview citations.",
263
+ )
264
+
265
+
266
+ def _check_faq_schema(types: set[str]) -> SignalResult:
267
+ has = "FAQPage" in types
268
+ return SignalResult(
269
+ "FAQ schema", "schema_structure", has, 6, 6 if has else 0,
270
+ "FAQPage JSON-LD present." if has else "No FAQPage schema.",
271
+ None if has else "Wrap your FAQ section in FAQPage JSON-LD — highest-correlation signal for AI Overview citations.",
272
+ )
273
+
274
+
275
+ def _check_howto_schema(types: set[str]) -> SignalResult:
276
+ has = "HowTo" in types
277
+ return SignalResult(
278
+ "HowTo schema", "schema_structure", has, 3, 3 if has else 0,
279
+ "HowTo schema present." if has else "No HowTo schema.",
280
+ None if has else "If your page has steps, add HowTo schema. Heavily lifted by AI summarizers.",
281
+ )
282
+
283
+
284
+ def _check_breadcrumb_schema(types: set[str]) -> SignalResult:
285
+ has = "BreadcrumbList" in types
286
+ return SignalResult(
287
+ "Breadcrumb schema", "schema_structure", has, 3, 3 if has else 0,
288
+ "BreadcrumbList schema present." if has else "No BreadcrumbList schema.",
289
+ None if has else "Add BreadcrumbList JSON-LD — helps AI understand site hierarchy.",
290
+ )
291
+
292
+
293
+ def _check_heading_structure(soup: BeautifulSoup) -> SignalResult:
294
+ # Count semantic h1s + ARIA-level-1 headings (modern frameworks often do <div role="heading" aria-level="1">)
295
+ h1s = soup.find_all("h1")
296
+ aria_h1s = soup.find_all(attrs={"role": "heading", "aria-level": "1"})
297
+ h2s = soup.find_all("h2") + soup.find_all(attrs={"role": "heading", "aria-level": "2"})
298
+ h1_count = len(h1s) + len(aria_h1s)
299
+ h2_count = len(h2s)
300
+
301
+ # Many modern pages style the title with CSS instead of <h1>. If we found a title tag,
302
+ # treat that as the implicit primary heading.
303
+ if h1_count == 0 and soup.find("title"):
304
+ h1_count = 1
305
+ detail_h1 = "(no explicit <h1>; using <title> as implicit primary heading)"
306
+ else:
307
+ detail_h1 = ""
308
+
309
+ if h1_count == 1 and h2_count >= 3:
310
+ return SignalResult(
311
+ "Heading structure", "schema_structure", True, 4, 4,
312
+ f"1 H1, {h2_count} H2s. Clean hierarchy. {detail_h1}".strip(), None,
313
+ )
314
+ fix = []
315
+ if h1_count == 0:
316
+ fix.append("No H1 detected. Add exactly one <h1> with the page's primary title.")
317
+ elif h1_count > 1:
318
+ fix.append(f"{h1_count} H1 tags found — should be exactly one.")
319
+ if h2_count < 3:
320
+ fix.append(f"Only {h2_count} H2s. AI extractors chunk by H2 — add more sectioning.")
321
+ partial = 2 if (h1_count >= 1 and h2_count >= 2) else 1
322
+ return SignalResult(
323
+ "Heading structure", "schema_structure", False, 4, partial,
324
+ f"{h1_count} H1, {h2_count} H2s. {detail_h1}".strip(),
325
+ " ".join(fix) if fix else "Improve heading hierarchy.",
326
+ )
327
+
328
+
329
+ def _check_definition_lists(soup: BeautifulSoup) -> SignalResult:
330
+ dl = soup.find_all("dl")
331
+ ok = len(dl) >= 1
332
+ return SignalResult(
333
+ "Definition lists", "schema_structure", ok, 3, 3 if ok else 0,
334
+ f"{len(dl)} `<dl>` elements." if ok else "No definition lists.",
335
+ None if ok else "Use `<dl><dt>term</dt><dd>definition</dd></dl>` for glossary-style content — strong lift signal.",
336
+ )
337
+
338
+
339
+ def _check_tables(soup: BeautifulSoup) -> SignalResult:
340
+ tables = soup.find_all("table")
341
+ ok = len(tables) >= 1
342
+ return SignalResult(
343
+ "Table markup", "schema_structure", ok, 3, 3 if ok else 0,
344
+ f"{len(tables)} `<table>` element(s)." if ok else "No tables.",
345
+ None if ok else "Comparison data deserves real `<table>` markup, not images of tables. AI lifts these.",
346
+ )
347
+
348
+
349
+ def _check_question_h2s(soup: BeautifulSoup) -> SignalResult:
350
+ h2s = soup.find_all("h2")
351
+ q_count = sum(1 for h in h2s if h.get_text(strip=True).rstrip().endswith("?"))
352
+ ok = q_count >= 2
353
+ return SignalResult(
354
+ "Question-style H2s", "schema_structure", ok, 3, min(3, q_count),
355
+ f"{q_count} H2s end with a question mark." if q_count else "No question-style H2s.",
356
+ None if ok else "Convert ≥2 H2s to actual user questions. AI Overviews extract Q&A patterns disproportionately.",
357
+ )
358
+
359
+
360
+ # Content / format
361
+ def _word_count(soup: BeautifulSoup) -> int:
362
+ for tag in soup(["script", "style", "noscript", "header", "footer", "nav"]):
363
+ tag.decompose()
364
+ text = soup.get_text(separator=" ", strip=True)
365
+ return len(re.findall(r"\b\w+\b", text))
366
+
367
+
368
+ def _check_word_count(wc: int) -> SignalResult:
369
+ if 1200 <= wc <= 3500:
370
+ return SignalResult(
371
+ "Word count (lift-worthy range)", "content_format", True, 5, 5,
372
+ f"{wc} words — in the lift-worthy range (1200–3500).", None,
373
+ )
374
+ if 800 <= wc < 1200 or 3500 < wc <= 5000:
375
+ return SignalResult(
376
+ "Word count (lift-worthy range)", "content_format", False, 5, 2,
377
+ f"{wc} words — outside the sweet spot (1200–3500).",
378
+ "Aim for 1500–2500 words. Thin pages and bloated pages both lose extraction priority.",
379
+ )
380
+ return SignalResult(
381
+ "Word count (lift-worthy range)", "content_format", False, 5, 0,
382
+ f"{wc} words — too {'thin' if wc < 800 else 'long'}.",
383
+ "Aim for 1500–2500 words with one clear answer per section.",
384
+ )
385
+
386
+
387
+ def _check_first_paragraph_answer(soup: BeautifulSoup) -> SignalResult:
388
+ article = soup.find("article") or soup.find("main") or soup.body
389
+ if not article:
390
+ return SignalResult(
391
+ "Answer in first 60 words", "content_format", False, 5, 0,
392
+ "Could not detect a main content region.",
393
+ "Wrap the article body in `<article>` or `<main>` for clean extraction.",
394
+ )
395
+ # Skip empty / very-short paragraphs (captions, bylines, image text) before finding the real first paragraph
396
+ first_text = ""
397
+ first_word_n = 0
398
+ for p in article.find_all("p"):
399
+ text = p.get_text(strip=True)
400
+ if not text:
401
+ continue
402
+ words = re.findall(r"\b\w+\b", text)
403
+ if len(words) < 8:
404
+ continue
405
+ first_text = text
406
+ first_word_n = len(words)
407
+ break
408
+
409
+ if first_word_n == 0:
410
+ return SignalResult(
411
+ "Answer in first 60 words", "content_format", False, 5, 0,
412
+ "No non-trivial first paragraph found in main content.",
413
+ "Open with a real <p> of 15–60 words that directly answers the page's question.",
414
+ )
415
+
416
+ ok = 15 <= first_word_n <= 60 and first_text.endswith((".", "!", "?"))
417
+ return SignalResult(
418
+ "Answer in first 60 words", "content_format", ok, 5,
419
+ 5 if ok else (2 if first_word_n >= 15 else 1),
420
+ f"First real paragraph: {first_word_n} words." if ok
421
+ else f"First real paragraph is {first_word_n} words — outside the AI-lift sweet spot of 15–60.",
422
+ None if ok else "Cut the intro. The first paragraph should be a 15–60 word direct answer to the page's core question.",
423
+ )
424
+
425
+
426
+ def _check_tldr(soup: BeautifulSoup) -> SignalResult:
427
+ text = soup.get_text(separator=" ", strip=True).lower()
428
+ has = bool(re.search(r"\btl;dr\b|\btldr\b|\bin short\b|\bsummary\b", text[:2000]))
429
+ return SignalResult(
430
+ "TL;DR / summary block near top", "content_format", has, 5, 5 if has else 0,
431
+ "TL;DR or summary detected near the top of the page." if has else "No TL;DR or summary block found.",
432
+ None if has else "Add a 50-word TL;DR after the H1. AI summarizers lift TL;DR blocks at much higher rates.",
433
+ )
434
+
435
+
436
+ def _check_bold_answer(soup: BeautifulSoup) -> SignalResult:
437
+ article = soup.find("article") or soup.find("main") or soup.body
438
+ if not article:
439
+ return SignalResult("Bold answer in first section", "content_format", False, 5, 0,
440
+ "No main content region detected.", None)
441
+ first_p = article.find("p")
442
+ if not first_p:
443
+ return SignalResult("Bold answer in first section", "content_format", False, 5, 0,
444
+ "No paragraph found.", None)
445
+ has_bold = bool(first_p.find(["strong", "b"]))
446
+ return SignalResult(
447
+ "Bold answer in first section", "content_format", has_bold, 5, 5 if has_bold else 0,
448
+ "First paragraph contains a `<strong>` or `<b>` tag." if has_bold
449
+ else "First paragraph has no bold emphasis.",
450
+ None if has_bold else "Bold the literal answer in the first paragraph. Visual emphasis = extraction signal.",
451
+ )
452
+
453
+
454
+ def _check_lists(soup: BeautifulSoup) -> SignalResult:
455
+ lists = soup.find_all(["ol", "ul"])
456
+ ok = len(lists) >= 3
457
+ return SignalResult(
458
+ "Structured lists", "content_format", ok, 5, min(5, len(lists)),
459
+ f"{len(lists)} list elements." if lists else "No ordered/unordered lists.",
460
+ None if ok else "Use real list markup (3+ ol/ul) — AI Overviews favor structured enumeration.",
461
+ )
462
+
463
+
464
+ # Authority
465
+ def _check_external_authority_links(soup: BeautifulSoup, page_host: str) -> SignalResult:
466
+ links = soup.find_all("a", href=True)
467
+ external_auth = 0
468
+ auth_tlds = (".gov", ".edu", ".ac.uk", "wikipedia.org")
469
+ for a in links:
470
+ try:
471
+ href = a["href"]
472
+ if not href.startswith("http"):
473
+ continue
474
+ host = urlparse(href).netloc.lower()
475
+ if host == page_host:
476
+ continue
477
+ if any(host.endswith(t) for t in auth_tlds):
478
+ external_auth += 1
479
+ except (KeyError, ValueError):
480
+ continue
481
+ ok = external_auth >= 2
482
+ return SignalResult(
483
+ "Authority outbound links", "authority", ok, 5, min(5, external_auth),
484
+ f"{external_auth} link(s) to .gov / .edu / Wikipedia." if external_auth
485
+ else "No outbound links to authority domains.",
486
+ None if ok else "Cite 2+ authority sources (.gov, .edu, Wikipedia). Provenance is an extraction signal.",
487
+ )
488
+
489
+
490
+ def _check_internal_links(soup: BeautifulSoup, page_host: str) -> SignalResult:
491
+ links = soup.find_all("a", href=True)
492
+ internal = 0
493
+ for a in links:
494
+ href = a["href"]
495
+ if href.startswith("/") and not href.startswith("//"):
496
+ internal += 1
497
+ elif href.startswith("http"):
498
+ try:
499
+ if urlparse(href).netloc.lower() == page_host:
500
+ internal += 1
501
+ except ValueError:
502
+ continue
503
+ ok = 5 <= internal <= 50
504
+ return SignalResult(
505
+ "Internal linking", "authority", ok, 4, 4 if ok else (2 if internal else 0),
506
+ f"{internal} internal links." if internal else "No internal links detected.",
507
+ None if ok else "5–50 internal links is the healthy range. Below = orphan; above = link soup.",
508
+ )
509
+
510
+
511
+ def _check_author_byline(soup: BeautifulSoup, jsonld: list[dict]) -> SignalResult:
512
+ # JSON-LD author field
513
+ has_author = False
514
+ for entry in jsonld:
515
+ nodes = entry.get("@graph", [entry])
516
+ for n in nodes:
517
+ if isinstance(n, dict) and n.get("author"):
518
+ has_author = True
519
+ break
520
+ if has_author:
521
+ break
522
+ # Or visible byline
523
+ if not has_author:
524
+ text = soup.get_text(separator=" ", strip=True).lower()
525
+ has_author = bool(re.search(r"\bby [a-z]+\s+[a-z]+\b|written by\b|author:", text[:3000]))
526
+ return SignalResult(
527
+ "Named author / byline", "authority", has_author, 5, 5 if has_author else 0,
528
+ "Author named (schema or visible byline)." if has_author else "No author byline detected.",
529
+ None if has_author else "Name a real author with a profile page. E-E-A-T's first E = experience, and that means a person.",
530
+ )
531
+
532
+
533
+ def _check_external_link_density(soup: BeautifulSoup, page_host: str) -> SignalResult:
534
+ links = soup.find_all("a", href=True)
535
+ external = 0
536
+ for a in links:
537
+ href = a["href"]
538
+ if href.startswith("http"):
539
+ try:
540
+ if urlparse(href).netloc.lower() != page_host:
541
+ external += 1
542
+ except ValueError:
543
+ continue
544
+ ok = 3 <= external <= 30
545
+ return SignalResult(
546
+ "Outbound link density", "authority", ok, 3, 3 if ok else 1,
547
+ f"{external} outbound links." if external else "No outbound links.",
548
+ None if ok else "Cite sources liberally (3–30 outbound). AI prioritizes content with clear provenance.",
549
+ )
550
+
551
+
552
+ def _check_reviews_or_quotes(soup: BeautifulSoup) -> SignalResult:
553
+ blockquotes = soup.find_all("blockquote")
554
+ cites = soup.find_all("cite")
555
+ total = len(blockquotes) + len(cites)
556
+ ok = total >= 1
557
+ return SignalResult(
558
+ "Quotes & citations markup", "authority", ok, 3, 3 if ok else 0,
559
+ f"{len(blockquotes)} `<blockquote>`, {len(cites)} `<cite>`." if total else "No `<blockquote>` or `<cite>` tags.",
560
+ None if ok else "Wrap quotes in `<blockquote>`. AI summarizers credit quoted sources back to the original.",
561
+ )
562
+
563
+
564
+ # Freshness
565
+ def _check_last_modified(response, jsonld: list[dict]) -> SignalResult:
566
+ # Try Last-Modified header
567
+ lm = response.headers.get("last-modified") if response else None
568
+ # Try date in JSON-LD
569
+ date_modified = None
570
+ for entry in jsonld:
571
+ nodes = entry.get("@graph", [entry])
572
+ for n in nodes:
573
+ if isinstance(n, dict):
574
+ if n.get("dateModified"):
575
+ date_modified = n["dateModified"]
576
+ break
577
+ if date_modified:
578
+ break
579
+ has = bool(lm or date_modified)
580
+ detail = f"dateModified: {date_modified}" if date_modified else (f"Last-Modified: {lm}" if lm else "No modification date detected.")
581
+ return SignalResult(
582
+ "Last modified date", "freshness", has, 4, 4 if has else 0,
583
+ detail,
584
+ None if has else "Expose a dateModified in JSON-LD or via Last-Modified header. AI prioritizes fresh content.",
585
+ )
586
+
587
+
588
+ def _check_dated_claims(soup: BeautifulSoup) -> SignalResult:
589
+ text = soup.get_text(separator=" ", strip=True)
590
+ # Look for "in 2024", "in 2025", "in 2026", "as of {month} 2026", etc.
591
+ has_date_phrases = bool(
592
+ re.search(r"\b(in|as of|updated|since)\s+(202[3-6]|january|february|march|april|may|june|july|august|september|october|november|december)\b", text, re.I)
593
+ )
594
+ return SignalResult(
595
+ "Dated claims in body", "freshness", has_date_phrases, 3, 3 if has_date_phrases else 0,
596
+ "Body contains explicit dated phrases." if has_date_phrases else "No dated phrases in body text.",
597
+ None if has_date_phrases else "Use 'As of {month} 2026' on every claim. Undated content reads stale to AI models.",
598
+ )
599
+
600
+
601
+ def _check_year_in_title(soup: BeautifulSoup) -> SignalResult:
602
+ title_tag = soup.find("h1") or soup.find("title")
603
+ title = title_tag.get_text(strip=True) if title_tag else ""
604
+ has_year = bool(re.search(r"\b(202[4-6])\b", title))
605
+ return SignalResult(
606
+ "Year in title", "freshness", has_year, 3, 3 if has_year else 0,
607
+ f"Title contains a year." if has_year else "No year in H1/title.",
608
+ None if has_year else "If the content is time-sensitive, include the year in the title (e.g. '... in 2026').",
609
+ )
610
+
611
+
612
+ # ============ ORCHESTRATOR ============
613
+
614
+ def score_url(url: str) -> ScoreResult:
615
+ url = _norm_url(url)
616
+ result = ScoreResult(
617
+ url=url,
618
+ score=0,
619
+ grade="F",
620
+ fetched_at=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
621
+ page_title=None,
622
+ )
623
+
624
+ response, err = _fetch(url)
625
+ if response is None:
626
+ result.notes.append(err or "Fetch failed for unknown reason.")
627
+ result.grade = "—"
628
+ return result
629
+
630
+ soup = BeautifulSoup(response.text, "lxml")
631
+ parsed_url = urlparse(str(response.url))
632
+ page_host = parsed_url.netloc.lower()
633
+
634
+ title_tag = soup.find("title")
635
+ result.page_title = title_tag.get_text(strip=True) if title_tag else None
636
+
637
+ jsonld = _extract_jsonld(soup)
638
+ schema_set = _schema_types(jsonld)
639
+ wc = _word_count(soup)
640
+
641
+ # Detect likely JS-rendered SPA: lots of script tags + very few semantic content tags.
642
+ # If we see ≥3 scripts but fewer than 3 <p> tags AND fewer than 50 words of body text,
643
+ # this page is almost certainly client-side rendered.
644
+ script_count = len(soup.find_all("script"))
645
+ semantic_p = len(soup.find_all("p"))
646
+ if script_count >= 3 and semantic_p < 3 and wc < 200:
647
+ result.notes.append(
648
+ "⚠ This page appears to be JavaScript-rendered. AI crawlers see what we see — not what you see in your browser. "
649
+ "The score reflects what's actually in the raw HTML response."
650
+ )
651
+
652
+ signals: list[SignalResult] = [
653
+ # Crawlability bucket (15 pts max)
654
+ _check_https(parsed_url),
655
+ _check_canonical(soup, url),
656
+ _check_viewport(soup),
657
+ _check_open_graph(soup),
658
+ _check_llms_txt(parsed_url),
659
+ _check_robots_ai(parsed_url),
660
+
661
+ # Schema & structure (30 pts max)
662
+ _check_article_schema(schema_set),
663
+ _check_faq_schema(schema_set),
664
+ _check_howto_schema(schema_set),
665
+ _check_breadcrumb_schema(schema_set),
666
+ _check_heading_structure(soup),
667
+ _check_definition_lists(soup),
668
+ _check_tables(soup),
669
+ _check_question_h2s(soup),
670
+
671
+ # Content / format (25 pts max)
672
+ _check_word_count(wc),
673
+ _check_first_paragraph_answer(soup),
674
+ _check_tldr(soup),
675
+ _check_bold_answer(soup),
676
+ _check_lists(soup),
677
+
678
+ # Authority (20 pts max)
679
+ _check_external_authority_links(soup, page_host),
680
+ _check_internal_links(soup, page_host),
681
+ _check_author_byline(soup, jsonld),
682
+ _check_external_link_density(soup, page_host),
683
+ _check_reviews_or_quotes(soup),
684
+
685
+ # Freshness (10 pts max)
686
+ _check_last_modified(response, jsonld),
687
+ _check_dated_claims(soup),
688
+ _check_year_in_title(soup),
689
+ ]
690
+
691
+ # Aggregate per bucket
692
+ bucket_raw: dict[str, list[SignalResult]] = {b: [] for b in WEIGHTS}
693
+ for s in signals:
694
+ bucket_raw[s.bucket].append(s)
695
+
696
+ bucket_summary: dict[str, dict[str, float]] = {}
697
+ total_points = 0.0
698
+ for bucket, weight_max in WEIGHTS.items():
699
+ bs = bucket_raw[bucket]
700
+ bucket_weight = sum(s.weight for s in bs) or 1
701
+ bucket_earned = sum(s.points for s in bs)
702
+ # Normalize to the bucket weight cap
703
+ scaled = (bucket_earned / bucket_weight) * weight_max if bucket_weight else 0
704
+ bucket_summary[bucket] = {
705
+ "earned": round(scaled, 1),
706
+ "max": float(weight_max),
707
+ "percent": round(100 * scaled / weight_max, 1) if weight_max else 0,
708
+ }
709
+ total_points += scaled
710
+
711
+ score_int = max(0, min(100, round(total_points)))
712
+ result.score = score_int
713
+ result.grade = _grade(score_int)
714
+ result.bucket_scores = bucket_summary
715
+ result.signals = signals
716
+ result.fixes = [s.fix for s in signals if s.fix][:10] # top 10 actionable fixes
717
+ return result
718
+
719
+
720
+ # Convenience for serverless / CLI
721
+ def score_url_json(url: str) -> str:
722
+ return json.dumps(score_url(url).to_dict(), indent=2)
@@ -0,0 +1,47 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "oryon-score"
7
+ version = "0.1.0"
8
+ description = "Score any URL for AI search readiness. Free open-source tool by Oryon."
9
+ readme = "README.md"
10
+ authors = [{ name = "Amaury", email = "amaury@seoryon.com" }]
11
+ license = { text = "MIT" }
12
+ requires-python = ">=3.10"
13
+ keywords = ["seo", "ai-search", "ai-overview", "geo", "aeo", "llm-citation", "schema", "audit"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Internet :: WWW/HTTP",
23
+ ]
24
+ dependencies = [
25
+ "httpx>=0.27",
26
+ "beautifulsoup4>=4.12",
27
+ "lxml>=5.0",
28
+ ]
29
+
30
+ [project.urls]
31
+ Homepage = "https://seoryon.com"
32
+ Repository = "https://github.com/SEOryon/oryon-score"
33
+ Issues = "https://github.com/SEOryon/oryon-score/issues"
34
+
35
+ [project.scripts]
36
+ oryon-score = "oryon_score.cli:main"
37
+
38
+ [tool.hatch.build.targets.wheel]
39
+ packages = ["oryon_score"]
40
+
41
+ [tool.hatch.build.targets.sdist]
42
+ include = [
43
+ "/oryon_score",
44
+ "/README.md",
45
+ "/LICENSE",
46
+ "/pyproject.toml",
47
+ ]