@braedenbuilds/crawl-sim 1.0.5 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +15 -0
- package/.claude-plugin/plugin.json +13 -0
- package/README.md +32 -9
- package/bin/install.js +6 -2
- package/package.json +8 -3
- package/{SKILL.md → skills/crawl-sim/SKILL.md} +23 -2
- package/{scripts → skills/crawl-sim/scripts}/_lib.sh +30 -0
- package/skills/crawl-sim/scripts/compute-score.sh +744 -0
- package/{scripts → skills/crawl-sim/scripts}/extract-jsonld.sh +12 -0
- package/skills/crawl-sim/scripts/fetch-as-bot.sh +151 -0
- package/skills/crawl-sim/scripts/schema-fields.sh +25 -0
- package/scripts/compute-score.sh +0 -424
- package/scripts/fetch-as-bot.sh +0 -87
- /package/{profiles → skills/crawl-sim/profiles}/chatgpt-user.json +0 -0
- /package/{profiles → skills/crawl-sim/profiles}/claude-searchbot.json +0 -0
- /package/{profiles → skills/crawl-sim/profiles}/claude-user.json +0 -0
- /package/{profiles → skills/crawl-sim/profiles}/claudebot.json +0 -0
- /package/{profiles → skills/crawl-sim/profiles}/googlebot.json +0 -0
- /package/{profiles → skills/crawl-sim/profiles}/gptbot.json +0 -0
- /package/{profiles → skills/crawl-sim/profiles}/oai-searchbot.json +0 -0
- /package/{profiles → skills/crawl-sim/profiles}/perplexity-user.json +0 -0
- /package/{profiles → skills/crawl-sim/profiles}/perplexitybot.json +0 -0
- /package/{scripts → skills/crawl-sim/scripts}/check-llmstxt.sh +0 -0
- /package/{scripts → skills/crawl-sim/scripts}/check-robots.sh +0 -0
- /package/{scripts → skills/crawl-sim/scripts}/check-sitemap.sh +0 -0
- /package/{scripts → skills/crawl-sim/scripts}/diff-render.sh +0 -0
- /package/{scripts → skills/crawl-sim/scripts}/extract-links.sh +0 -0
- /package/{scripts → skills/crawl-sim/scripts}/extract-meta.sh +0 -0
|
@@ -0,0 +1,744 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -eu
|
|
3
|
+
|
|
4
|
+
# compute-score.sh — Aggregate check outputs into per-bot + per-category scores
|
|
5
|
+
# Usage: compute-score.sh [--page-type <type>] <results-dir>
|
|
6
|
+
# Output: JSON to stdout
|
|
7
|
+
#
|
|
8
|
+
# Expected filenames in <results-dir>:
|
|
9
|
+
# fetch-<bot_id>.json — fetch-as-bot.sh output
|
|
10
|
+
# meta-<bot_id>.json — extract-meta.sh output
|
|
11
|
+
# jsonld-<bot_id>.json — extract-jsonld.sh output
|
|
12
|
+
# links-<bot_id>.json — extract-links.sh output
|
|
13
|
+
# robots-<bot_id>.json — check-robots.sh output
|
|
14
|
+
# llmstxt.json — check-llmstxt.sh output (bot-independent)
|
|
15
|
+
# sitemap.json — check-sitemap.sh output (bot-independent)
|
|
16
|
+
# diff-render.json — diff-render.sh output (optional, Googlebot only)
|
|
17
|
+
#
|
|
18
|
+
# The --page-type flag overrides URL-based page-type detection. Valid values:
|
|
19
|
+
# root, detail, archive, faq, about, contact, generic.
|
|
20
|
+
|
|
21
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
22
|
+
# shellcheck source=_lib.sh
|
|
23
|
+
. "$SCRIPT_DIR/_lib.sh"
|
|
24
|
+
# shellcheck source=schema-fields.sh
|
|
25
|
+
. "$SCRIPT_DIR/schema-fields.sh"
|
|
26
|
+
|
|
27
|
+
PAGE_TYPE_OVERRIDE=""
|
|
28
|
+
while [ $# -gt 0 ]; do
|
|
29
|
+
case "$1" in
|
|
30
|
+
--page-type)
|
|
31
|
+
[ $# -ge 2 ] || { echo "--page-type requires a value" >&2; exit 2; }
|
|
32
|
+
PAGE_TYPE_OVERRIDE="$2"
|
|
33
|
+
shift 2
|
|
34
|
+
;;
|
|
35
|
+
--page-type=*)
|
|
36
|
+
PAGE_TYPE_OVERRIDE="${1#--page-type=}"
|
|
37
|
+
shift
|
|
38
|
+
;;
|
|
39
|
+
-h|--help)
|
|
40
|
+
echo "Usage: compute-score.sh [--page-type <type>] <results-dir>"
|
|
41
|
+
exit 0
|
|
42
|
+
;;
|
|
43
|
+
--)
|
|
44
|
+
shift
|
|
45
|
+
break
|
|
46
|
+
;;
|
|
47
|
+
-*)
|
|
48
|
+
echo "Unknown flag: $1" >&2
|
|
49
|
+
exit 2
|
|
50
|
+
;;
|
|
51
|
+
*)
|
|
52
|
+
break
|
|
53
|
+
;;
|
|
54
|
+
esac
|
|
55
|
+
done
|
|
56
|
+
|
|
57
|
+
RESULTS_DIR="${1:?Usage: compute-score.sh [--page-type <type>] <results-dir>}"
|
|
58
|
+
|
|
59
|
+
if [ -n "$PAGE_TYPE_OVERRIDE" ]; then
|
|
60
|
+
case "$PAGE_TYPE_OVERRIDE" in
|
|
61
|
+
root|detail|archive|faq|about|contact|generic) ;;
|
|
62
|
+
*)
|
|
63
|
+
echo "Error: invalid --page-type '$PAGE_TYPE_OVERRIDE' (valid: root, detail, archive, faq, about, contact, generic)" >&2
|
|
64
|
+
exit 2
|
|
65
|
+
;;
|
|
66
|
+
esac
|
|
67
|
+
fi
|
|
68
|
+
|
|
69
|
+
printf '[compute-score] aggregating %s\n' "$RESULTS_DIR" >&2
|
|
70
|
+
|
|
71
|
+
if [ ! -d "$RESULTS_DIR" ]; then
|
|
72
|
+
echo "Error: results dir not found: $RESULTS_DIR" >&2
|
|
73
|
+
exit 1
|
|
74
|
+
fi
|
|
75
|
+
|
|
76
|
+
# Category weights (as percentages of per-bot composite)
|
|
77
|
+
W_ACCESSIBILITY=25
|
|
78
|
+
W_CONTENT=30
|
|
79
|
+
W_STRUCTURED=20
|
|
80
|
+
W_TECHNICAL=15
|
|
81
|
+
W_AI=10
|
|
82
|
+
|
|
83
|
+
# Overall composite weights (per bot)
|
|
84
|
+
overall_weight() {
|
|
85
|
+
case "$1" in
|
|
86
|
+
googlebot) echo 40 ;;
|
|
87
|
+
gptbot) echo 20 ;;
|
|
88
|
+
claudebot) echo 20 ;;
|
|
89
|
+
perplexitybot) echo 20 ;;
|
|
90
|
+
*) echo 0 ;;
|
|
91
|
+
esac
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
grade_for() {
|
|
95
|
+
local s=$1
|
|
96
|
+
if [ "$s" -ge 93 ]; then echo "A"
|
|
97
|
+
elif [ "$s" -ge 90 ]; then echo "A-"
|
|
98
|
+
elif [ "$s" -ge 87 ]; then echo "B+"
|
|
99
|
+
elif [ "$s" -ge 83 ]; then echo "B"
|
|
100
|
+
elif [ "$s" -ge 80 ]; then echo "B-"
|
|
101
|
+
elif [ "$s" -ge 77 ]; then echo "C+"
|
|
102
|
+
elif [ "$s" -ge 73 ]; then echo "C"
|
|
103
|
+
elif [ "$s" -ge 70 ]; then echo "C-"
|
|
104
|
+
elif [ "$s" -ge 67 ]; then echo "D+"
|
|
105
|
+
elif [ "$s" -ge 63 ]; then echo "D"
|
|
106
|
+
elif [ "$s" -ge 60 ]; then echo "D-"
|
|
107
|
+
else echo "F"
|
|
108
|
+
fi
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
# Rubric: expected schema types per page type.
|
|
112
|
+
rubric_expected() {
|
|
113
|
+
case "$1" in
|
|
114
|
+
root) echo "Organization WebSite" ;;
|
|
115
|
+
detail) echo "Article BreadcrumbList" ;;
|
|
116
|
+
archive) echo "CollectionPage ItemList BreadcrumbList" ;;
|
|
117
|
+
faq) echo "FAQPage BreadcrumbList" ;;
|
|
118
|
+
about) echo "AboutPage BreadcrumbList Organization" ;;
|
|
119
|
+
contact) echo "ContactPage BreadcrumbList" ;;
|
|
120
|
+
*) echo "WebPage BreadcrumbList" ;;
|
|
121
|
+
esac
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
rubric_optional() {
|
|
125
|
+
case "$1" in
|
|
126
|
+
root) echo "ProfessionalService LocalBusiness" ;;
|
|
127
|
+
detail) echo "NewsArticle ImageObject Person" ;;
|
|
128
|
+
archive) echo "" ;;
|
|
129
|
+
faq) echo "WebPage" ;;
|
|
130
|
+
about) echo "Person" ;;
|
|
131
|
+
contact) echo "PostalAddress" ;;
|
|
132
|
+
*) echo "" ;;
|
|
133
|
+
esac
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
rubric_forbidden() {
|
|
137
|
+
case "$1" in
|
|
138
|
+
root) echo "BreadcrumbList Article FAQPage" ;;
|
|
139
|
+
detail) echo "CollectionPage ItemList" ;;
|
|
140
|
+
archive) echo "Article Product" ;;
|
|
141
|
+
faq) echo "Article CollectionPage" ;;
|
|
142
|
+
about) echo "Article Product" ;;
|
|
143
|
+
contact) echo "Article Product" ;;
|
|
144
|
+
*) echo "" ;;
|
|
145
|
+
esac
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
list_contains() {
|
|
149
|
+
local needle="$1"
|
|
150
|
+
shift
|
|
151
|
+
local item
|
|
152
|
+
for item in "$@"; do
|
|
153
|
+
[ "$item" = "$needle" ] && return 0
|
|
154
|
+
done
|
|
155
|
+
return 1
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
list_count() {
|
|
159
|
+
# shellcheck disable=SC2086
|
|
160
|
+
set -- $1
|
|
161
|
+
echo "$#"
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
list_intersect() {
|
|
165
|
+
local a="$1" b="$2"
|
|
166
|
+
local out="" item
|
|
167
|
+
# shellcheck disable=SC2086
|
|
168
|
+
for item in $a; do
|
|
169
|
+
# shellcheck disable=SC2086
|
|
170
|
+
if list_contains "$item" $b; then
|
|
171
|
+
out="$out $item"
|
|
172
|
+
fi
|
|
173
|
+
done
|
|
174
|
+
printf '%s' "${out# }"
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
list_diff() {
|
|
178
|
+
local a="$1" b="$2"
|
|
179
|
+
local out="" item
|
|
180
|
+
# shellcheck disable=SC2086
|
|
181
|
+
for item in $a; do
|
|
182
|
+
# shellcheck disable=SC2086
|
|
183
|
+
if ! list_contains "$item" $b; then
|
|
184
|
+
out="$out $item"
|
|
185
|
+
fi
|
|
186
|
+
done
|
|
187
|
+
printf '%s' "${out# }"
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
jget() {
|
|
191
|
+
local file="$1"
|
|
192
|
+
local query="$2"
|
|
193
|
+
local default="${3:-null}"
|
|
194
|
+
if [ -f "$file" ]; then
|
|
195
|
+
jq -r --arg d "$default" "$query // \$d" "$file" 2>/dev/null || echo "$default"
|
|
196
|
+
else
|
|
197
|
+
echo "$default"
|
|
198
|
+
fi
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
jget_num() {
|
|
202
|
+
local v
|
|
203
|
+
v=$(jget "$1" "$2" "0")
|
|
204
|
+
if ! printf '%s' "$v" | grep -qE '^-?[0-9]+(\.[0-9]+)?$'; then
|
|
205
|
+
echo "0"
|
|
206
|
+
else
|
|
207
|
+
echo "$v"
|
|
208
|
+
fi
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
jget_bool() {
|
|
212
|
+
local v
|
|
213
|
+
v=$(jget "$1" "$2" "false")
|
|
214
|
+
if [ "$v" = "true" ]; then echo "true"; else echo "false"; fi
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
BOTS=""
|
|
218
|
+
FIRST_FETCH=""
|
|
219
|
+
for f in "$RESULTS_DIR"/fetch-*.json; do
|
|
220
|
+
[ -f "$f" ] || continue
|
|
221
|
+
[ -z "$FIRST_FETCH" ] && FIRST_FETCH="$f"
|
|
222
|
+
bot_id=$(basename "$f" .json | sed 's/^fetch-//')
|
|
223
|
+
BOTS="$BOTS $bot_id"
|
|
224
|
+
done
|
|
225
|
+
|
|
226
|
+
if [ -z "$BOTS" ]; then
|
|
227
|
+
echo "Error: no fetch-*.json files found in $RESULTS_DIR" >&2
|
|
228
|
+
exit 1
|
|
229
|
+
fi
|
|
230
|
+
|
|
231
|
+
LLMSTXT_FILE="$RESULTS_DIR/llmstxt.json"
|
|
232
|
+
SITEMAP_FILE="$RESULTS_DIR/sitemap.json"
|
|
233
|
+
DIFF_RENDER_FILE="$RESULTS_DIR/diff-render.json"
|
|
234
|
+
|
|
235
|
+
DIFF_AVAILABLE=false
|
|
236
|
+
DIFF_RENDERED_WORDS=0
|
|
237
|
+
DIFF_DELTA_PCT=0
|
|
238
|
+
if [ -f "$DIFF_RENDER_FILE" ]; then
|
|
239
|
+
DIFF_SKIPPED=$(jq -r '.skipped | if . == null then "true" else tostring end' "$DIFF_RENDER_FILE" 2>/dev/null || echo "true")
|
|
240
|
+
if [ "$DIFF_SKIPPED" = "false" ]; then
|
|
241
|
+
DIFF_AVAILABLE=true
|
|
242
|
+
DIFF_RENDERED_WORDS=$(jq -r '.renderedWordCount // 0' "$DIFF_RENDER_FILE")
|
|
243
|
+
DIFF_DELTA_PCT=$(jq -r '.deltaPct // 0' "$DIFF_RENDER_FILE")
|
|
244
|
+
fi
|
|
245
|
+
fi
|
|
246
|
+
|
|
247
|
+
# Resolve page type once from the first fetch file's URL, unless overridden.
|
|
248
|
+
TARGET_URL=$(jget "$FIRST_FETCH" '.url' "")
|
|
249
|
+
if [ -n "$PAGE_TYPE_OVERRIDE" ]; then
|
|
250
|
+
PAGE_TYPE="$PAGE_TYPE_OVERRIDE"
|
|
251
|
+
else
|
|
252
|
+
PAGE_TYPE=$(page_type_for_url "$TARGET_URL")
|
|
253
|
+
fi
|
|
254
|
+
printf '[compute-score] page type: %s (url: %s)\n' "$PAGE_TYPE" "$TARGET_URL" >&2
|
|
255
|
+
|
|
256
|
+
RUBRIC_EXPECTED="$(rubric_expected "$PAGE_TYPE")"
|
|
257
|
+
RUBRIC_OPTIONAL="$(rubric_optional "$PAGE_TYPE")"
|
|
258
|
+
RUBRIC_FORBIDDEN="$(rubric_forbidden "$PAGE_TYPE")"
|
|
259
|
+
EXPECTED_COUNT=$(list_count "$RUBRIC_EXPECTED")
|
|
260
|
+
|
|
261
|
+
BOTS_JSON="{}"
|
|
262
|
+
|
|
263
|
+
CAT_ACCESSIBILITY_SUM=0
|
|
264
|
+
CAT_CONTENT_SUM=0
|
|
265
|
+
CAT_STRUCTURED_SUM=0
|
|
266
|
+
CAT_TECHNICAL_SUM=0
|
|
267
|
+
CAT_AI_SUM=0
|
|
268
|
+
CAT_N=0
|
|
269
|
+
|
|
270
|
+
OVERALL_WEIGHTED_SUM=0
|
|
271
|
+
OVERALL_WEIGHT_TOTAL=0
|
|
272
|
+
|
|
273
|
+
for bot_id in $BOTS; do
|
|
274
|
+
FETCH="$RESULTS_DIR/fetch-$bot_id.json"
|
|
275
|
+
META="$RESULTS_DIR/meta-$bot_id.json"
|
|
276
|
+
JSONLD="$RESULTS_DIR/jsonld-$bot_id.json"
|
|
277
|
+
LINKS="$RESULTS_DIR/links-$bot_id.json"
|
|
278
|
+
ROBOTS="$RESULTS_DIR/robots-$bot_id.json"
|
|
279
|
+
|
|
280
|
+
BOT_NAME=$(jget "$FETCH" '.bot.name' "$bot_id")
|
|
281
|
+
|
|
282
|
+
# Check for fetch failure — skip scoring, emit F grade (AC-A3)
|
|
283
|
+
FETCH_FAILED=$(jget_bool "$FETCH" '.fetchFailed')
|
|
284
|
+
if [ "$FETCH_FAILED" = "true" ]; then
|
|
285
|
+
FETCH_ERROR=$(jget "$FETCH" '.error' "unknown error")
|
|
286
|
+
RENDERS_JS=$(jq -r '.bot.rendersJavaScript | if . == null then "unknown" else tostring end' "$FETCH" 2>/dev/null || echo "unknown")
|
|
287
|
+
BOT_OBJ=$(jq -n \
|
|
288
|
+
--arg id "$bot_id" \
|
|
289
|
+
--arg name "$BOT_NAME" \
|
|
290
|
+
--arg rendersJs "$RENDERS_JS" \
|
|
291
|
+
--arg error "$FETCH_ERROR" \
|
|
292
|
+
'{
|
|
293
|
+
id: $id,
|
|
294
|
+
name: $name,
|
|
295
|
+
rendersJavaScript: (if $rendersJs == "true" then true elif $rendersJs == "false" then false else $rendersJs end),
|
|
296
|
+
fetchFailed: true,
|
|
297
|
+
error: $error,
|
|
298
|
+
score: 0,
|
|
299
|
+
grade: "F",
|
|
300
|
+
visibility: { serverWords: 0, effectiveWords: 0, missedWordsVsRendered: 0, hydrationPenaltyPts: 0 },
|
|
301
|
+
categories: {
|
|
302
|
+
accessibility: { score: 0, grade: "F" },
|
|
303
|
+
contentVisibility: { score: 0, grade: "F" },
|
|
304
|
+
structuredData: { score: 0, grade: "F", pageType: "unknown", expected: [], optional: [], forbidden: [], present: [], missing: [], extras: [], violations: [{ kind: "fetch_failed", impact: -100 }], calculation: "fetch failed — no data to score", notes: ("Fetch failed: " + $error) },
|
|
305
|
+
technicalSignals: { score: 0, grade: "F" },
|
|
306
|
+
aiReadiness: { score: 0, grade: "F" }
|
|
307
|
+
}
|
|
308
|
+
}')
|
|
309
|
+
BOTS_JSON=$(printf '%s' "$BOTS_JSON" | jq --argjson bot "$BOT_OBJ" --arg id "$bot_id" '.[$id] = $bot')
|
|
310
|
+
printf '[compute-score] %s: fetch failed, scoring as F\n' "$bot_id" >&2
|
|
311
|
+
CAT_N=$((CAT_N + 1))
|
|
312
|
+
continue
|
|
313
|
+
fi
|
|
314
|
+
|
|
315
|
+
STATUS=$(jget_num "$FETCH" '.status')
|
|
316
|
+
TOTAL_TIME=$(jget_num "$FETCH" '.timing.total')
|
|
317
|
+
SERVER_WORD_COUNT=$(jget_num "$FETCH" '.wordCount')
|
|
318
|
+
RENDERS_JS=$(jq -r '.bot.rendersJavaScript | if . == null then "unknown" else tostring end' "$FETCH" 2>/dev/null || echo "unknown")
|
|
319
|
+
|
|
320
|
+
ROBOTS_ALLOWED=$(jget_bool "$ROBOTS" '.allowed')
|
|
321
|
+
|
|
322
|
+
EFFECTIVE_WORD_COUNT=$SERVER_WORD_COUNT
|
|
323
|
+
HYDRATION_PENALTY=0
|
|
324
|
+
MISSED_WORDS=0
|
|
325
|
+
if [ "$DIFF_AVAILABLE" = "true" ]; then
|
|
326
|
+
if [ "$RENDERS_JS" = "true" ]; then
|
|
327
|
+
EFFECTIVE_WORD_COUNT=$DIFF_RENDERED_WORDS
|
|
328
|
+
elif [ "$RENDERS_JS" = "false" ]; then
|
|
329
|
+
ABS_DELTA=$(awk -v d="$DIFF_DELTA_PCT" 'BEGIN { printf "%d", (d < 0 ? -d : d) + 0.5 }')
|
|
330
|
+
if [ "$ABS_DELTA" -gt 5 ]; then
|
|
331
|
+
HYDRATION_PENALTY=$(awk -v d="$ABS_DELTA" 'BEGIN {
|
|
332
|
+
p = (d - 5)
|
|
333
|
+
if (p > 15) p = 15
|
|
334
|
+
printf "%d", p
|
|
335
|
+
}')
|
|
336
|
+
fi
|
|
337
|
+
MISSED_WORDS=$((DIFF_RENDERED_WORDS - SERVER_WORD_COUNT))
|
|
338
|
+
[ "$MISSED_WORDS" -lt 0 ] && MISSED_WORDS=0
|
|
339
|
+
fi
|
|
340
|
+
fi
|
|
341
|
+
|
|
342
|
+
# --- Category 1: Accessibility (0-100) ---
|
|
343
|
+
ACC=0
|
|
344
|
+
[ "$ROBOTS_ALLOWED" = "true" ] && ACC=$((ACC + 40))
|
|
345
|
+
[ "$STATUS" = "200" ] && ACC=$((ACC + 40))
|
|
346
|
+
TIME_SCORE=$(awk -v t="$TOTAL_TIME" 'BEGIN { if (t < 2) print 20; else if (t < 5) print 10; else print 0 }')
|
|
347
|
+
ACC=$((ACC + TIME_SCORE))
|
|
348
|
+
|
|
349
|
+
# --- Category 2: Content Visibility (0-100) ---
|
|
350
|
+
CONTENT=0
|
|
351
|
+
if [ "$EFFECTIVE_WORD_COUNT" -ge 300 ]; then CONTENT=$((CONTENT + 30))
|
|
352
|
+
elif [ "$EFFECTIVE_WORD_COUNT" -ge 150 ]; then CONTENT=$((CONTENT + 20))
|
|
353
|
+
elif [ "$EFFECTIVE_WORD_COUNT" -ge 50 ]; then CONTENT=$((CONTENT + 10))
|
|
354
|
+
fi
|
|
355
|
+
|
|
356
|
+
H1_COUNT=$(jget_num "$META" '.headings.h1.count')
|
|
357
|
+
H2_COUNT=$(jget_num "$META" '.headings.h2.count')
|
|
358
|
+
[ "$H1_COUNT" -ge 1 ] && CONTENT=$((CONTENT + 20))
|
|
359
|
+
[ "$H2_COUNT" -ge 1 ] && CONTENT=$((CONTENT + 15))
|
|
360
|
+
|
|
361
|
+
INTERNAL_LINKS=$(jget_num "$LINKS" '.counts.internal')
|
|
362
|
+
if [ "$INTERNAL_LINKS" -ge 5 ]; then CONTENT=$((CONTENT + 20))
|
|
363
|
+
elif [ "$INTERNAL_LINKS" -ge 1 ]; then CONTENT=$((CONTENT + 10))
|
|
364
|
+
fi
|
|
365
|
+
|
|
366
|
+
IMG_TOTAL=$(jget_num "$META" '.images.total')
|
|
367
|
+
IMG_WITH_ALT=$(jget_num "$META" '.images.withAlt')
|
|
368
|
+
if [ "$IMG_TOTAL" -eq 0 ]; then
|
|
369
|
+
CONTENT=$((CONTENT + 15))
|
|
370
|
+
else
|
|
371
|
+
ALT_SCORE=$(awk -v a="$IMG_WITH_ALT" -v t="$IMG_TOTAL" 'BEGIN { printf "%d", (a / t) * 15 }')
|
|
372
|
+
CONTENT=$((CONTENT + ALT_SCORE))
|
|
373
|
+
fi
|
|
374
|
+
|
|
375
|
+
CONTENT=$((CONTENT - HYDRATION_PENALTY))
|
|
376
|
+
[ $CONTENT -lt 0 ] && CONTENT=0
|
|
377
|
+
|
|
378
|
+
# --- Category 3: Structured Data (0-100) ---
|
|
379
|
+
JSONLD_COUNT=$(jget_num "$JSONLD" '.blockCount')
|
|
380
|
+
JSONLD_VALID=$(jget_num "$JSONLD" '.validCount')
|
|
381
|
+
JSONLD_INVALID=$(jget_num "$JSONLD" '.invalidCount')
|
|
382
|
+
|
|
383
|
+
if [ -f "$JSONLD" ]; then
|
|
384
|
+
PRESENT_TYPES=$(jq -r '.types[]? // empty' "$JSONLD" 2>/dev/null | awk 'NF && !seen[$0]++' | tr '\n' ' ')
|
|
385
|
+
PRESENT_TYPES=${PRESENT_TYPES% }
|
|
386
|
+
else
|
|
387
|
+
PRESENT_TYPES=""
|
|
388
|
+
fi
|
|
389
|
+
|
|
390
|
+
PRESENT_EXPECTED=$(list_intersect "$RUBRIC_EXPECTED" "$PRESENT_TYPES")
|
|
391
|
+
PRESENT_OPTIONAL=$(list_intersect "$RUBRIC_OPTIONAL" "$PRESENT_TYPES")
|
|
392
|
+
PRESENT_FORBIDDEN=$(list_intersect "$RUBRIC_FORBIDDEN" "$PRESENT_TYPES")
|
|
393
|
+
MISSING_EXPECTED=$(list_diff "$RUBRIC_EXPECTED" "$PRESENT_TYPES")
|
|
394
|
+
RUBRIC_KNOWN="$RUBRIC_EXPECTED $RUBRIC_OPTIONAL $RUBRIC_FORBIDDEN"
|
|
395
|
+
EXTRAS=$(list_diff "$PRESENT_TYPES" "$RUBRIC_KNOWN")
|
|
396
|
+
|
|
397
|
+
PRESENT_EXPECTED_COUNT=$(list_count "$PRESENT_EXPECTED")
|
|
398
|
+
PRESENT_OPTIONAL_COUNT=$(list_count "$PRESENT_OPTIONAL")
|
|
399
|
+
PRESENT_FORBIDDEN_COUNT=$(list_count "$PRESENT_FORBIDDEN")
|
|
400
|
+
|
|
401
|
+
BASE=$(awk -v h="$PRESENT_EXPECTED_COUNT" -v t="$EXPECTED_COUNT" \
|
|
402
|
+
'BEGIN { if (t == 0) print 0; else printf "%d", (h / t) * 100 + 0.5 }')
|
|
403
|
+
|
|
404
|
+
BONUS=$((PRESENT_OPTIONAL_COUNT * 10))
|
|
405
|
+
[ $BONUS -gt 20 ] && BONUS=20
|
|
406
|
+
|
|
407
|
+
FORBID_PENALTY=$((PRESENT_FORBIDDEN_COUNT * 10))
|
|
408
|
+
|
|
409
|
+
VALID_PENALTY=0
|
|
410
|
+
if [ "$JSONLD_COUNT" -gt 0 ] && [ "$JSONLD_INVALID" -gt 0 ]; then
|
|
411
|
+
VALID_PENALTY=$((JSONLD_INVALID * 5))
|
|
412
|
+
[ $VALID_PENALTY -gt 20 ] && VALID_PENALTY=20
|
|
413
|
+
fi
|
|
414
|
+
|
|
415
|
+
# Field-level validation (C3): check required fields per schema type
|
|
416
|
+
FIELD_PENALTY=0
|
|
417
|
+
FIELD_VIOLATIONS_JSON="[]"
|
|
418
|
+
BLOCK_COUNT_FOR_FIELDS=0
|
|
419
|
+
if [ -f "$JSONLD" ]; then
|
|
420
|
+
BLOCK_COUNT_FOR_FIELDS=$(jq 'if has("blocks") then .blocks | length else 0 end' "$JSONLD" 2>/dev/null || echo "0")
|
|
421
|
+
fi
|
|
422
|
+
if [ "$BLOCK_COUNT_FOR_FIELDS" -gt 0 ]; then
|
|
423
|
+
i=0
|
|
424
|
+
while [ "$i" -lt "$BLOCK_COUNT_FOR_FIELDS" ]; do
|
|
425
|
+
BLOCK_TYPE=$(jq -r ".blocks[$i].type" "$JSONLD" 2>/dev/null || echo "")
|
|
426
|
+
BLOCK_FIELDS=$(jq -r ".blocks[$i].fields[]?" "$JSONLD" 2>/dev/null | tr '\n' ' ')
|
|
427
|
+
REQUIRED=$(required_fields_for "$BLOCK_TYPE")
|
|
428
|
+
for field in $REQUIRED; do
|
|
429
|
+
# shellcheck disable=SC2086
|
|
430
|
+
if ! list_contains "$field" $BLOCK_FIELDS; then
|
|
431
|
+
FIELD_VIOLATIONS_JSON=$(printf '%s' "$FIELD_VIOLATIONS_JSON" | jq \
|
|
432
|
+
--arg schema "$BLOCK_TYPE" --arg field "$field" \
|
|
433
|
+
'. + [{kind: "missing_required_field", schema: $schema, field: $field, impact: -5}]')
|
|
434
|
+
FIELD_PENALTY=$((FIELD_PENALTY + 5))
|
|
435
|
+
fi
|
|
436
|
+
done
|
|
437
|
+
i=$((i + 1))
|
|
438
|
+
done
|
|
439
|
+
fi
|
|
440
|
+
[ $FIELD_PENALTY -gt 30 ] && FIELD_PENALTY=30
|
|
441
|
+
|
|
442
|
+
STRUCTURED=$((BASE + BONUS - FORBID_PENALTY - VALID_PENALTY - FIELD_PENALTY))
|
|
443
|
+
[ $STRUCTURED -gt 100 ] && STRUCTURED=100
|
|
444
|
+
[ $STRUCTURED -lt 0 ] && STRUCTURED=0
|
|
445
|
+
|
|
446
|
+
CALCULATION=$(printf 'base: %d/%d expected present = %d; +%d optional bonus; -%d forbidden penalty; -%d validity penalty; -%d field penalty; clamp [0,100] = %d' \
|
|
447
|
+
"$PRESENT_EXPECTED_COUNT" "$EXPECTED_COUNT" "$BASE" \
|
|
448
|
+
"$BONUS" "$FORBID_PENALTY" "$VALID_PENALTY" "$FIELD_PENALTY" "$STRUCTURED")
|
|
449
|
+
|
|
450
|
+
if [ "$STRUCTURED" -ge 100 ] && [ -z "$PRESENT_FORBIDDEN" ] && [ "$VALID_PENALTY" -eq 0 ] && [ "$FIELD_PENALTY" -eq 0 ]; then
|
|
451
|
+
NOTES="All expected schemas for pageType=$PAGE_TYPE are present. No structured-data action needed."
|
|
452
|
+
elif [ -n "$MISSING_EXPECTED" ] && [ -z "$PRESENT_FORBIDDEN" ]; then
|
|
453
|
+
NOTES="Missing expected schemas for pageType=$PAGE_TYPE: $MISSING_EXPECTED. Add these to raise the score."
|
|
454
|
+
elif [ -n "$PRESENT_FORBIDDEN" ] && [ -z "$MISSING_EXPECTED" ]; then
|
|
455
|
+
NOTES="Forbidden schemas present for pageType=$PAGE_TYPE: $PRESENT_FORBIDDEN. Remove these (or re-classify the page type with --page-type)."
|
|
456
|
+
elif [ -n "$PRESENT_FORBIDDEN" ] && [ -n "$MISSING_EXPECTED" ]; then
|
|
457
|
+
NOTES="Mixed: missing $MISSING_EXPECTED and forbidden present $PRESENT_FORBIDDEN for pageType=$PAGE_TYPE."
|
|
458
|
+
elif [ "$FIELD_PENALTY" -gt 0 ]; then
|
|
459
|
+
NOTES="Schemas for pageType=$PAGE_TYPE are present but missing required fields. See violations for details."
|
|
460
|
+
elif [ "$VALID_PENALTY" -gt 0 ]; then
|
|
461
|
+
NOTES="Score reduced by $VALID_PENALTY pts due to invalid JSON-LD blocks."
|
|
462
|
+
else
|
|
463
|
+
NOTES="Structured data scored for pageType=$PAGE_TYPE."
|
|
464
|
+
fi
|
|
465
|
+
|
|
466
|
+
STRUCTURED_GRADE=$(grade_for "$STRUCTURED")
|
|
467
|
+
STRUCTURED_OBJ=$(jq -n \
|
|
468
|
+
--argjson score "$STRUCTURED" \
|
|
469
|
+
--arg grade "$STRUCTURED_GRADE" \
|
|
470
|
+
--arg pageType "$PAGE_TYPE" \
|
|
471
|
+
--arg expectedList "$RUBRIC_EXPECTED" \
|
|
472
|
+
--arg optionalList "$RUBRIC_OPTIONAL" \
|
|
473
|
+
--arg forbiddenList "$RUBRIC_FORBIDDEN" \
|
|
474
|
+
--arg presentList "$PRESENT_TYPES" \
|
|
475
|
+
--arg missingList "$MISSING_EXPECTED" \
|
|
476
|
+
--arg extrasList "$EXTRAS" \
|
|
477
|
+
--arg forbiddenPresent "$PRESENT_FORBIDDEN" \
|
|
478
|
+
--argjson invalidCount "$JSONLD_INVALID" \
|
|
479
|
+
--argjson validPenalty "$VALID_PENALTY" \
|
|
480
|
+
--argjson fieldViolations "$FIELD_VIOLATIONS_JSON" \
|
|
481
|
+
--arg calculation "$CALCULATION" \
|
|
482
|
+
--arg notes "$NOTES" \
|
|
483
|
+
'
|
|
484
|
+
def to_arr: split(" ") | map(select(length > 0));
|
|
485
|
+
{
|
|
486
|
+
score: $score,
|
|
487
|
+
grade: $grade,
|
|
488
|
+
pageType: $pageType,
|
|
489
|
+
expected: ($expectedList | to_arr),
|
|
490
|
+
optional: ($optionalList | to_arr),
|
|
491
|
+
forbidden: ($forbiddenList | to_arr),
|
|
492
|
+
present: ($presentList | to_arr),
|
|
493
|
+
missing: ($missingList | to_arr),
|
|
494
|
+
extras: ($extrasList | to_arr),
|
|
495
|
+
violations: (
|
|
496
|
+
($forbiddenPresent | to_arr | map({kind: "forbidden_schema", schema: ., impact: -10}))
|
|
497
|
+
+ (if $validPenalty > 0
|
|
498
|
+
then [{kind: "invalid_jsonld", count: $invalidCount, impact: (0 - $validPenalty)}]
|
|
499
|
+
else []
|
|
500
|
+
end)
|
|
501
|
+
+ $fieldViolations
|
|
502
|
+
),
|
|
503
|
+
calculation: $calculation,
|
|
504
|
+
notes: $notes
|
|
505
|
+
}
|
|
506
|
+
')
|
|
507
|
+
|
|
508
|
+
# --- Category 4: Technical Signals (0-100) ---
|
|
509
|
+
TECHNICAL=0
|
|
510
|
+
TITLE=$(jget "$META" '.title' "")
|
|
511
|
+
DESCRIPTION=$(jget "$META" '.description' "")
|
|
512
|
+
CANONICAL=$(jget "$META" '.canonical' "")
|
|
513
|
+
OG_TITLE=$(jget "$META" '.og.title' "")
|
|
514
|
+
OG_DESC=$(jget "$META" '.og.description' "")
|
|
515
|
+
|
|
516
|
+
[ -n "$TITLE" ] && [ "$TITLE" != "null" ] && TECHNICAL=$((TECHNICAL + 25))
|
|
517
|
+
[ -n "$DESCRIPTION" ] && [ "$DESCRIPTION" != "null" ] && TECHNICAL=$((TECHNICAL + 25))
|
|
518
|
+
[ -n "$CANONICAL" ] && [ "$CANONICAL" != "null" ] && TECHNICAL=$((TECHNICAL + 20))
|
|
519
|
+
if [ -n "$OG_TITLE" ] && [ "$OG_TITLE" != "null" ]; then TECHNICAL=$((TECHNICAL + 8)); fi
|
|
520
|
+
if [ -n "$OG_DESC" ] && [ "$OG_DESC" != "null" ]; then TECHNICAL=$((TECHNICAL + 7)); fi
|
|
521
|
+
|
|
522
|
+
SITEMAP_EXISTS=$(jget_bool "$SITEMAP_FILE" '.exists')
|
|
523
|
+
SITEMAP_CONTAINS=$(jget_bool "$SITEMAP_FILE" '.containsTarget')
|
|
524
|
+
if [ "$SITEMAP_EXISTS" = "true" ] && [ "$SITEMAP_CONTAINS" = "true" ]; then
|
|
525
|
+
TECHNICAL=$((TECHNICAL + 15))
|
|
526
|
+
elif [ "$SITEMAP_EXISTS" = "true" ]; then
|
|
527
|
+
TECHNICAL=$((TECHNICAL + 10))
|
|
528
|
+
fi
|
|
529
|
+
|
|
530
|
+
# --- Category 5: AI Readiness (0-100) ---
|
|
531
|
+
AI=0
|
|
532
|
+
LLMS_EXISTS=$(jget_bool "$LLMSTXT_FILE" '.llmsTxt.exists')
|
|
533
|
+
LLMS_HAS_TITLE=$(jget_bool "$LLMSTXT_FILE" '.llmsTxt.hasTitle')
|
|
534
|
+
LLMS_HAS_DESC=$(jget_bool "$LLMSTXT_FILE" '.llmsTxt.hasDescription')
|
|
535
|
+
LLMS_URLS=$(jget_num "$LLMSTXT_FILE" '.llmsTxt.urlCount')
|
|
536
|
+
|
|
537
|
+
if [ "$LLMS_EXISTS" = "true" ]; then
|
|
538
|
+
AI=$((AI + 40))
|
|
539
|
+
[ "$LLMS_HAS_TITLE" = "true" ] && AI=$((AI + 7))
|
|
540
|
+
[ "$LLMS_HAS_DESC" = "true" ] && AI=$((AI + 7))
|
|
541
|
+
[ "$LLMS_URLS" -ge 1 ] && AI=$((AI + 6))
|
|
542
|
+
fi
|
|
543
|
+
[ "$EFFECTIVE_WORD_COUNT" -ge 200 ] && AI=$((AI + 20))
|
|
544
|
+
if [ "$H1_COUNT" -ge 1 ] && [ -n "$DESCRIPTION" ] && [ "$DESCRIPTION" != "null" ]; then
|
|
545
|
+
AI=$((AI + 20))
|
|
546
|
+
fi
|
|
547
|
+
|
|
548
|
+
[ $ACC -gt 100 ] && ACC=100
|
|
549
|
+
[ $CONTENT -gt 100 ] && CONTENT=100
|
|
550
|
+
[ $TECHNICAL -gt 100 ] && TECHNICAL=100
|
|
551
|
+
[ $AI -gt 100 ] && AI=100
|
|
552
|
+
|
|
553
|
+
BOT_SCORE=$(awk -v a=$ACC -v c=$CONTENT -v s=$STRUCTURED -v t=$TECHNICAL -v ai=$AI \
|
|
554
|
+
-v wa=$W_ACCESSIBILITY -v wc=$W_CONTENT -v ws=$W_STRUCTURED -v wt=$W_TECHNICAL -v wai=$W_AI \
|
|
555
|
+
'BEGIN { printf "%d", (a*wa + c*wc + s*ws + t*wt + ai*wai) / (wa+wc+ws+wt+wai) + 0.5 }')
|
|
556
|
+
|
|
557
|
+
BOT_GRADE=$(grade_for "$BOT_SCORE")
|
|
558
|
+
ACC_GRADE=$(grade_for "$ACC")
|
|
559
|
+
CONTENT_GRADE=$(grade_for "$CONTENT")
|
|
560
|
+
TECHNICAL_GRADE=$(grade_for "$TECHNICAL")
|
|
561
|
+
AI_GRADE=$(grade_for "$AI")
|
|
562
|
+
|
|
563
|
+
BOT_OBJ=$(jq -n \
|
|
564
|
+
--arg id "$bot_id" \
|
|
565
|
+
--arg name "$BOT_NAME" \
|
|
566
|
+
--arg rendersJs "$RENDERS_JS" \
|
|
567
|
+
--argjson score "$BOT_SCORE" \
|
|
568
|
+
--arg grade "$BOT_GRADE" \
|
|
569
|
+
--argjson acc "$ACC" \
|
|
570
|
+
--arg accGrade "$ACC_GRADE" \
|
|
571
|
+
--argjson content "$CONTENT" \
|
|
572
|
+
--arg contentGrade "$CONTENT_GRADE" \
|
|
573
|
+
--argjson structured "$STRUCTURED_OBJ" \
|
|
574
|
+
--argjson technical "$TECHNICAL" \
|
|
575
|
+
--arg technicalGrade "$TECHNICAL_GRADE" \
|
|
576
|
+
--argjson ai "$AI" \
|
|
577
|
+
--arg aiGrade "$AI_GRADE" \
|
|
578
|
+
--argjson serverWords "$SERVER_WORD_COUNT" \
|
|
579
|
+
--argjson effectiveWords "$EFFECTIVE_WORD_COUNT" \
|
|
580
|
+
--argjson missedWords "$MISSED_WORDS" \
|
|
581
|
+
--argjson hydrationPenalty "$HYDRATION_PENALTY" \
|
|
582
|
+
'{
|
|
583
|
+
id: $id,
|
|
584
|
+
name: $name,
|
|
585
|
+
rendersJavaScript: (if $rendersJs == "true" then true elif $rendersJs == "false" then false else $rendersJs end),
|
|
586
|
+
score: $score,
|
|
587
|
+
grade: $grade,
|
|
588
|
+
visibility: {
|
|
589
|
+
serverWords: $serverWords,
|
|
590
|
+
effectiveWords: $effectiveWords,
|
|
591
|
+
missedWordsVsRendered: $missedWords,
|
|
592
|
+
hydrationPenaltyPts: $hydrationPenalty
|
|
593
|
+
},
|
|
594
|
+
categories: {
|
|
595
|
+
accessibility: { score: $acc, grade: $accGrade },
|
|
596
|
+
contentVisibility: { score: $content, grade: $contentGrade },
|
|
597
|
+
structuredData: $structured,
|
|
598
|
+
technicalSignals: { score: $technical, grade: $technicalGrade },
|
|
599
|
+
aiReadiness: { score: $ai, grade: $aiGrade }
|
|
600
|
+
}
|
|
601
|
+
}')
|
|
602
|
+
|
|
603
|
+
BOTS_JSON=$(printf '%s' "$BOTS_JSON" | jq --argjson bot "$BOT_OBJ" --arg id "$bot_id" '.[$id] = $bot')
|
|
604
|
+
|
|
605
|
+
CAT_ACCESSIBILITY_SUM=$((CAT_ACCESSIBILITY_SUM + ACC))
|
|
606
|
+
CAT_CONTENT_SUM=$((CAT_CONTENT_SUM + CONTENT))
|
|
607
|
+
CAT_STRUCTURED_SUM=$((CAT_STRUCTURED_SUM + STRUCTURED))
|
|
608
|
+
CAT_TECHNICAL_SUM=$((CAT_TECHNICAL_SUM + TECHNICAL))
|
|
609
|
+
CAT_AI_SUM=$((CAT_AI_SUM + AI))
|
|
610
|
+
CAT_N=$((CAT_N + 1))
|
|
611
|
+
|
|
612
|
+
W=$(overall_weight "$bot_id")
|
|
613
|
+
if [ "$W" -gt 0 ]; then
|
|
614
|
+
OVERALL_WEIGHTED_SUM=$((OVERALL_WEIGHTED_SUM + BOT_SCORE * W))
|
|
615
|
+
OVERALL_WEIGHT_TOTAL=$((OVERALL_WEIGHT_TOTAL + W))
|
|
616
|
+
fi
|
|
617
|
+
done
|
|
618
|
+
|
|
619
|
+
CAT_ACC_AVG=$((CAT_ACCESSIBILITY_SUM / CAT_N))
|
|
620
|
+
CAT_CONTENT_AVG=$((CAT_CONTENT_SUM / CAT_N))
|
|
621
|
+
CAT_STRUCTURED_AVG=$((CAT_STRUCTURED_SUM / CAT_N))
|
|
622
|
+
CAT_TECHNICAL_AVG=$((CAT_TECHNICAL_SUM / CAT_N))
|
|
623
|
+
CAT_AI_AVG=$((CAT_AI_SUM / CAT_N))
|
|
624
|
+
|
|
625
|
+
if [ "$OVERALL_WEIGHT_TOTAL" -gt 0 ]; then
|
|
626
|
+
OVERALL_SCORE=$((OVERALL_WEIGHTED_SUM / OVERALL_WEIGHT_TOTAL))
|
|
627
|
+
else
|
|
628
|
+
OVERALL_SCORE=$(((CAT_ACC_AVG + CAT_CONTENT_AVG + CAT_STRUCTURED_AVG + CAT_TECHNICAL_AVG + CAT_AI_AVG) / 5))
|
|
629
|
+
fi
|
|
630
|
+
|
|
631
|
+
OVERALL_GRADE=$(grade_for "$OVERALL_SCORE")
|
|
632
|
+
CAT_ACC_GRADE=$(grade_for "$CAT_ACC_AVG")
|
|
633
|
+
CAT_CONTENT_GRADE=$(grade_for "$CAT_CONTENT_AVG")
|
|
634
|
+
CAT_STRUCTURED_GRADE=$(grade_for "$CAT_STRUCTURED_AVG")
|
|
635
|
+
CAT_TECHNICAL_GRADE=$(grade_for "$CAT_TECHNICAL_AVG")
|
|
636
|
+
CAT_AI_GRADE=$(grade_for "$CAT_AI_AVG")
|
|
637
|
+
|
|
638
|
+
# --- Cross-bot content parity (C4) ---
|
|
639
|
+
PARITY_MIN_WORDS=999999999
|
|
640
|
+
PARITY_MAX_WORDS=0
|
|
641
|
+
PARITY_BOT_COUNT=0
|
|
642
|
+
for bot_id in $BOTS; do
|
|
643
|
+
FETCH="$RESULTS_DIR/fetch-$bot_id.json"
|
|
644
|
+
P_FETCH_FAILED=$(jget_bool "$FETCH" '.fetchFailed')
|
|
645
|
+
[ "$P_FETCH_FAILED" = "true" ] && continue
|
|
646
|
+
WC=$(jget_num "$FETCH" '.wordCount')
|
|
647
|
+
[ "$WC" -lt "$PARITY_MIN_WORDS" ] && PARITY_MIN_WORDS=$WC
|
|
648
|
+
[ "$WC" -gt "$PARITY_MAX_WORDS" ] && PARITY_MAX_WORDS=$WC
|
|
649
|
+
PARITY_BOT_COUNT=$((PARITY_BOT_COUNT + 1))
|
|
650
|
+
done
|
|
651
|
+
|
|
652
|
+
if [ "$PARITY_BOT_COUNT" -le 1 ]; then
|
|
653
|
+
PARITY_SCORE=100
|
|
654
|
+
PARITY_MAX_DELTA=0
|
|
655
|
+
elif [ "$PARITY_MAX_WORDS" -gt 0 ]; then
|
|
656
|
+
PARITY_SCORE=$(awk -v min="$PARITY_MIN_WORDS" -v max="$PARITY_MAX_WORDS" \
|
|
657
|
+
'BEGIN { printf "%d", (min / max) * 100 + 0.5 }')
|
|
658
|
+
PARITY_MAX_DELTA=$(awk -v min="$PARITY_MIN_WORDS" -v max="$PARITY_MAX_WORDS" \
|
|
659
|
+
'BEGIN { printf "%d", ((max - min) / max) * 100 + 0.5 }')
|
|
660
|
+
else
|
|
661
|
+
PARITY_SCORE=100
|
|
662
|
+
PARITY_MAX_DELTA=0
|
|
663
|
+
fi
|
|
664
|
+
|
|
665
|
+
[ "$PARITY_SCORE" -gt 100 ] && PARITY_SCORE=100
|
|
666
|
+
PARITY_GRADE=$(grade_for "$PARITY_SCORE")
|
|
667
|
+
|
|
668
|
+
if [ "$PARITY_SCORE" -ge 95 ]; then
|
|
669
|
+
PARITY_INTERP="Content is consistent across all bots."
|
|
670
|
+
elif [ "$PARITY_SCORE" -ge 50 ]; then
|
|
671
|
+
PARITY_INTERP="Moderate content divergence between bots — likely partial client-side rendering hydration."
|
|
672
|
+
else
|
|
673
|
+
PARITY_INTERP="Severe content divergence — site likely relies on client-side rendering. AI bots see significantly less content than Googlebot."
|
|
674
|
+
fi
|
|
675
|
+
|
|
676
|
+
# --- Warnings (H2) ---
|
|
677
|
+
WARNINGS="[]"
|
|
678
|
+
if [ "$DIFF_AVAILABLE" != "true" ]; then
|
|
679
|
+
DIFF_REASON="not_found"
|
|
680
|
+
if [ -f "$DIFF_RENDER_FILE" ]; then
|
|
681
|
+
DIFF_REASON=$(jq -r '.reason // "skipped"' "$DIFF_RENDER_FILE" 2>/dev/null || echo "skipped")
|
|
682
|
+
fi
|
|
683
|
+
WARNINGS=$(printf '%s' "$WARNINGS" | jq --arg reason "$DIFF_REASON" \
|
|
684
|
+
'. + [{
|
|
685
|
+
code: "diff_render_unavailable",
|
|
686
|
+
severity: "high",
|
|
687
|
+
message: "JS rendering comparison was skipped. If this site uses CSR, non-JS bot scores may be inaccurate.",
|
|
688
|
+
reason: $reason
|
|
689
|
+
}]')
|
|
690
|
+
fi
|
|
691
|
+
|
|
692
|
+
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
693
|
+
|
|
694
|
+
jq -n \
|
|
695
|
+
--arg url "$TARGET_URL" \
|
|
696
|
+
--arg timestamp "$TIMESTAMP" \
|
|
697
|
+
--arg version "0.2.0" \
|
|
698
|
+
--arg pageType "$PAGE_TYPE" \
|
|
699
|
+
--arg pageTypeOverride "$PAGE_TYPE_OVERRIDE" \
|
|
700
|
+
--argjson overallScore "$OVERALL_SCORE" \
|
|
701
|
+
--arg overallGrade "$OVERALL_GRADE" \
|
|
702
|
+
--argjson bots "$BOTS_JSON" \
|
|
703
|
+
--argjson catAcc "$CAT_ACC_AVG" \
|
|
704
|
+
--arg catAccGrade "$CAT_ACC_GRADE" \
|
|
705
|
+
--argjson catContent "$CAT_CONTENT_AVG" \
|
|
706
|
+
--arg catContentGrade "$CAT_CONTENT_GRADE" \
|
|
707
|
+
--argjson catStructured "$CAT_STRUCTURED_AVG" \
|
|
708
|
+
--arg catStructuredGrade "$CAT_STRUCTURED_GRADE" \
|
|
709
|
+
--argjson catTechnical "$CAT_TECHNICAL_AVG" \
|
|
710
|
+
--arg catTechnicalGrade "$CAT_TECHNICAL_GRADE" \
|
|
711
|
+
--argjson catAi "$CAT_AI_AVG" \
|
|
712
|
+
--arg catAiGrade "$CAT_AI_GRADE" \
|
|
713
|
+
--argjson warnings "$WARNINGS" \
|
|
714
|
+
--argjson parityScore "$PARITY_SCORE" \
|
|
715
|
+
--arg parityGrade "$PARITY_GRADE" \
|
|
716
|
+
--argjson parityMinWords "$PARITY_MIN_WORDS" \
|
|
717
|
+
--argjson parityMaxWords "$PARITY_MAX_WORDS" \
|
|
718
|
+
--argjson parityMaxDelta "$PARITY_MAX_DELTA" \
|
|
719
|
+
--arg parityInterp "$PARITY_INTERP" \
|
|
720
|
+
'{
|
|
721
|
+
url: $url,
|
|
722
|
+
timestamp: $timestamp,
|
|
723
|
+
version: $version,
|
|
724
|
+
pageType: $pageType,
|
|
725
|
+
pageTypeOverridden: ($pageTypeOverride | length > 0),
|
|
726
|
+
overall: { score: $overallScore, grade: $overallGrade },
|
|
727
|
+
parity: {
|
|
728
|
+
score: $parityScore,
|
|
729
|
+
grade: $parityGrade,
|
|
730
|
+
minWords: (if $parityMinWords >= 999999999 then 0 else $parityMinWords end),
|
|
731
|
+
maxWords: $parityMaxWords,
|
|
732
|
+
maxDeltaPct: $parityMaxDelta,
|
|
733
|
+
interpretation: $parityInterp
|
|
734
|
+
},
|
|
735
|
+
warnings: $warnings,
|
|
736
|
+
bots: $bots,
|
|
737
|
+
categories: {
|
|
738
|
+
accessibility: { score: $catAcc, grade: $catAccGrade },
|
|
739
|
+
contentVisibility: { score: $catContent, grade: $catContentGrade },
|
|
740
|
+
structuredData: { score: $catStructured, grade: $catStructuredGrade },
|
|
741
|
+
technicalSignals: { score: $catTechnical, grade: $catTechnicalGrade },
|
|
742
|
+
aiReadiness: { score: $catAi, grade: $catAiGrade }
|
|
743
|
+
}
|
|
744
|
+
}'
|