npm - @braedenbuilds/crawl-sim - Versions diffs - 1.0.5 → 1.2.0 - Mend

@braedenbuilds/crawl-sim 1.0.5 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/{scripts → skills/crawl-sim/scripts}/extract-jsonld.sh RENAMED Viewed

@@ -62,6 +62,7 @@ fi
 VALID_COUNT=0
 INVALID_COUNT=0
+BLOCKS_JSON="[]"
 if [ "$BLOCK_COUNT" -gt 0 ]; then
   while IFS= read -r block; do
@@ -79,6 +80,15 @@ if [ "$BLOCK_COUNT" -gt 0 ]; then
           else empty end;
         collect_types
       ' 2>/dev/null >> "$TYPES_FILE" || true
+      # Extract per-block type + top-level field names for field validation (AC-B1)
+      BLOCK_INFO=$(printf '%s' "$block" | jq -c '
+        {
+          type: (if has("@type") then (.["@type"] | if type == "array" then .[0] else . end) else "unknown" end),
+          fields: (keys | map(select(startswith("@") | not)))
+        }
+      ' 2>/dev/null || echo '{"type":"unknown","fields":[]}')
+      BLOCKS_JSON=$(printf '%s' "$BLOCKS_JSON" | jq --argjson b "$BLOCK_INFO" '. + [$b]')
     else
       INVALID_COUNT=$((INVALID_COUNT + 1))
     fi
@@ -109,6 +119,7 @@ jq -n \
   --argjson valid "$VALID_COUNT" \
   --argjson invalid "$INVALID_COUNT" \
   --argjson types "$TYPES_JSON" \
+  --argjson blocks "$BLOCKS_JSON" \
   --argjson hasOrg "$HAS_ORG" \
   --argjson hasBreadcrumb "$HAS_BREADCRUMB" \
   --argjson hasWebsite "$HAS_WEBSITE" \
@@ -121,6 +132,7 @@ jq -n \
     validCount: $valid,
     invalidCount: $invalid,
     types: $types,
+    blocks: $blocks,
     flags: {
       hasOrganization: $hasOrg,
       hasBreadcrumbList: $hasBreadcrumb,

package/skills/crawl-sim/scripts/fetch-as-bot.sh ADDED Viewed

@@ -0,0 +1,151 @@
+#!/usr/bin/env bash
+set -euo pipefail
+# fetch-as-bot.sh — Fetch a URL as a specific bot User-Agent
+# Usage: fetch-as-bot.sh <url> <profile.json>
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=_lib.sh
+. "$SCRIPT_DIR/_lib.sh"
+URL="${1:?Usage: fetch-as-bot.sh <url> <profile.json>}"
+PROFILE="${2:?Usage: fetch-as-bot.sh <url> <profile.json>}"
+BOT_ID=$(jq -r '.id' "$PROFILE")
+BOT_NAME=$(jq -r '.name' "$PROFILE")
+UA=$(jq -r '.userAgent' "$PROFILE")
+RENDERS_JS=$(jq -r '.rendersJavaScript' "$PROFILE")
+TMPDIR="${TMPDIR:-/tmp}"
+HEADERS_FILE=$(mktemp "$TMPDIR/crawlsim-headers.XXXXXX")
+BODY_FILE=$(mktemp "$TMPDIR/crawlsim-body.XXXXXX")
+CURL_STDERR_FILE=$(mktemp "$TMPDIR/crawlsim-stderr.XXXXXX")
+trap 'rm -f "$HEADERS_FILE" "$BODY_FILE" "$CURL_STDERR_FILE"' EXIT
+printf '[%s] fetching %s\n' "$BOT_ID" "$URL" >&2
+set +e
+TIMING=$(curl -sS -L \
+  -H "User-Agent: $UA" \
+  -D "$HEADERS_FILE" \
+  -o "$BODY_FILE" \
+  -w '{"total":%{time_total},"ttfb":%{time_starttransfer},"connect":%{time_connect},"statusCode":%{http_code},"sizeDownload":%{size_download},"redirectCount":%{num_redirects},"finalUrl":"%{url_effective}"}' \
+  --max-time 30 \
+  "$URL" 2>"$CURL_STDERR_FILE")
+CURL_EXIT=$?
+set -e
+CURL_ERR=""
+if [ -s "$CURL_STDERR_FILE" ]; then
+  CURL_ERR=$(cat "$CURL_STDERR_FILE")
+fi
+if [ "$CURL_EXIT" -ne 0 ]; then
+  printf '[%s] FAILED: curl exit %d — %s\n' "$BOT_ID" "$CURL_EXIT" "$CURL_ERR" >&2
+  jq -n \
+    --arg url "$URL" \
+    --arg botId "$BOT_ID" \
+    --arg botName "$BOT_NAME" \
+    --arg ua "$UA" \
+    --arg rendersJs "$RENDERS_JS" \
+    --arg error "$CURL_ERR" \
+    --argjson exitCode "$CURL_EXIT" \
+    '{
+      url: $url,
+      bot: {
+        id: $botId,
+        name: $botName,
+        userAgent: $ua,
+        rendersJavaScript: (if $rendersJs == "true" then true elif $rendersJs == "false" then false else $rendersJs end)
+      },
+      fetchFailed: true,
+      error: $error,
+      curlExitCode: $exitCode,
+      status: 0,
+      timing: { total: 0, ttfb: 0 },
+      size: 0,
+      wordCount: 0,
+      headers: {},
+      bodyBase64: ""
+    }'
+  exit 0
+fi
+read -r STATUS TOTAL_TIME TTFB SIZE REDIRECT_COUNT FINAL_URL <<< \
+  "$(echo "$TIMING" | jq -r '[.statusCode, .total, .ttfb, .sizeDownload, .redirectCount, .finalUrl] | @tsv')"
+# Parse response headers into a JSON object using jq for safe escaping.
+# curl -L writes multiple blocks on redirect; jq keeps the last definition
+# of each header since `add` overwrites left-to-right.
+HEADERS_JSON=$(tr -d '\r' < "$HEADERS_FILE" \
+  | grep -E '^[A-Za-z][A-Za-z0-9-]*:[[:space:]]' \
+  | jq -Rs '
+      split("\n")
+      | map(select(length > 0))
+      | map(capture("^(?<k>[^:]+):[[:space:]]*(?<v>.*)$"))
+      | map({(.k): .v})
+      | add // {}
+    ')
+# Parse redirect chain from headers dump.
+# curl -D writes multiple HTTP response blocks on redirect — each starts with HTTP/.
+REDIRECT_CHAIN="[]"
+if [ "$REDIRECT_COUNT" -gt 0 ]; then
+  REDIRECT_CHAIN=$(tr -d '\r' < "$HEADERS_FILE" | awk '
+    /^HTTP\// { status=$2; url="" }
+    /^[Ll]ocation:/ { url=$2 }
+    /^$/ && status && url { printf "%s %s\n", status, url; status=""; url="" }
+  ' | jq -Rs '
+    split("\n") | map(select(length > 0)) |
+    to_entries | map({
+      hop: .key,
+      status: (.value | split(" ")[0] | tonumber),
+      location: (.value | split(" ")[1:] | join(" "))
+    })
+  ')
+fi
+WORD_COUNT=$(count_words "$BODY_FILE")
+[ -z "$WORD_COUNT" ] && WORD_COUNT=0
+BODY_B64=""
+if [ -s "$BODY_FILE" ]; then
+  BODY_B64=$(base64 < "$BODY_FILE")
+fi
+printf '[%s] ok: status=%s size=%s words=%s time=%ss\n' "$BOT_ID" "$STATUS" "$SIZE" "$WORD_COUNT" "$TOTAL_TIME" >&2
+jq -n \
+  --arg url "$URL" \
+  --arg botId "$BOT_ID" \
+  --arg botName "$BOT_NAME" \
+  --arg ua "$UA" \
+  --arg rendersJs "$RENDERS_JS" \
+  --argjson status "$STATUS" \
+  --argjson totalTime "$TOTAL_TIME" \
+  --argjson ttfb "$TTFB" \
+  --argjson size "$SIZE" \
+  --argjson wordCount "$WORD_COUNT" \
+  --argjson headers "$HEADERS_JSON" \
+  --argjson redirectCount "$REDIRECT_COUNT" \
+  --arg finalUrl "$FINAL_URL" \
+  --argjson redirectChain "$REDIRECT_CHAIN" \
+  --arg bodyBase64 "$BODY_B64" \
+  '{
+    url: $url,
+    bot: {
+      id: $botId,
+      name: $botName,
+      userAgent: $ua,
+      rendersJavaScript: (if $rendersJs == "true" then true elif $rendersJs == "false" then false else $rendersJs end)
+    },
+    status: $status,
+    timing: { total: $totalTime, ttfb: $ttfb },
+    size: $size,
+    wordCount: $wordCount,
+    redirectCount: $redirectCount,
+    finalUrl: $finalUrl,
+    redirectChain: $redirectChain,
+    headers: $headers,
+    bodyBase64: $bodyBase64
+  }'

package/skills/crawl-sim/scripts/schema-fields.sh ADDED Viewed

@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# schema-fields.sh — Required field definitions per schema.org type.
+# Source this file, then call required_fields_for <SchemaType>.
+required_fields_for() {
+  case "$1" in
+    Organization)        echo "name url" ;;
+    WebSite)             echo "name url" ;;
+    Article)             echo "headline author datePublished" ;;
+    NewsArticle)         echo "headline author datePublished" ;;
+    FAQPage)             echo "mainEntity" ;;
+    BreadcrumbList)      echo "itemListElement" ;;
+    CollectionPage)      echo "name" ;;
+    ItemList)            echo "itemListElement" ;;
+    AboutPage)           echo "name" ;;
+    ContactPage)         echo "name" ;;
+    Product)             echo "name" ;;
+    LocalBusiness)       echo "name address" ;;
+    ProfessionalService) echo "name" ;;
+    Person)              echo "name" ;;
+    ImageObject)         echo "contentUrl" ;;
+    PostalAddress)       echo "streetAddress" ;;
+    *)                   echo "" ;;
+  esac
+}

package/scripts/compute-score.sh DELETED Viewed

@@ -1,424 +0,0 @@
-#!/usr/bin/env bash
-set -eu
-# compute-score.sh — Aggregate check outputs into per-bot + per-category scores
-# Usage: compute-score.sh <results-dir>
-# Output: JSON to stdout
-#
-# Expected filenames in <results-dir>:
-#   fetch-<bot_id>.json      — fetch-as-bot.sh output
-#   meta-<bot_id>.json       — extract-meta.sh output
-#   jsonld-<bot_id>.json     — extract-jsonld.sh output
-#   links-<bot_id>.json      — extract-links.sh output
-#   robots-<bot_id>.json     — check-robots.sh output
-#   llmstxt.json             — check-llmstxt.sh output (bot-independent)
-#   sitemap.json             — check-sitemap.sh output (bot-independent)
-#   diff-render.json         — diff-render.sh output (optional, Googlebot only)
-RESULTS_DIR="${1:?Usage: compute-score.sh <results-dir>}"
-printf '[compute-score] aggregating %s\n' "$RESULTS_DIR" >&2
-if [ ! -d "$RESULTS_DIR" ]; then
-  echo "Error: results dir not found: $RESULTS_DIR" >&2
-  exit 1
-fi
-# Category weights (as percentages of per-bot composite)
-W_ACCESSIBILITY=25
-W_CONTENT=30
-W_STRUCTURED=20
-W_TECHNICAL=15
-W_AI=10
-# Overall composite weights (per bot)
-# Default: Googlebot 40, GPTBot 20, ClaudeBot 20, PerplexityBot 20
-overall_weight() {
-  case "$1" in
-    googlebot) echo 40 ;;
-    gptbot) echo 20 ;;
-    claudebot) echo 20 ;;
-    perplexitybot) echo 20 ;;
-    *) echo 0 ;;
-  esac
-}
-# Grade from score (0-100)
-grade_for() {
-  local s=$1
-  if   [ "$s" -ge 93 ]; then echo "A"
-  elif [ "$s" -ge 90 ]; then echo "A-"
-  elif [ "$s" -ge 87 ]; then echo "B+"
-  elif [ "$s" -ge 83 ]; then echo "B"
-  elif [ "$s" -ge 80 ]; then echo "B-"
-  elif [ "$s" -ge 77 ]; then echo "C+"
-  elif [ "$s" -ge 73 ]; then echo "C"
-  elif [ "$s" -ge 70 ]; then echo "C-"
-  elif [ "$s" -ge 67 ]; then echo "D+"
-  elif [ "$s" -ge 63 ]; then echo "D"
-  elif [ "$s" -ge 60 ]; then echo "D-"
-  else echo "F"
-  fi
-}
-# Read a jq value from a file with a default fallback
-jget() {
-  local file="$1"
-  local query="$2"
-  local default="${3:-null}"
-  if [ -f "$file" ]; then
-    jq -r --arg d "$default" "$query // \$d" "$file" 2>/dev/null || echo "$default"
-  else
-    echo "$default"
-  fi
-}
-jget_num() {
-  local v
-  v=$(jget "$1" "$2" "0")
-  # Replace "null" or non-numeric with 0
-  if ! printf '%s' "$v" | grep -qE '^-?[0-9]+(\.[0-9]+)?$'; then
-    echo "0"
-  else
-    echo "$v"
-  fi
-}
-jget_bool() {
-  local v
-  v=$(jget "$1" "$2" "false")
-  if [ "$v" = "true" ]; then echo "true"; else echo "false"; fi
-}
-BOTS=""
-for f in "$RESULTS_DIR"/fetch-*.json; do
-  [ -f "$f" ] || continue
-  bot_id=$(basename "$f" .json | sed 's/^fetch-//')
-  BOTS="$BOTS $bot_id"
-done
-if [ -z "$BOTS" ]; then
-  echo "Error: no fetch-*.json files found in $RESULTS_DIR" >&2
-  exit 1
-fi
-LLMSTXT_FILE="$RESULTS_DIR/llmstxt.json"
-SITEMAP_FILE="$RESULTS_DIR/sitemap.json"
-DIFF_RENDER_FILE="$RESULTS_DIR/diff-render.json"
-# Load Playwright render-delta data once (used to differentiate JS-rendering
-# bots from non-rendering ones). If the comparison was skipped or missing,
-# all bots score against server HTML only.
-DIFF_AVAILABLE=false
-DIFF_RENDERED_WORDS=0
-DIFF_DELTA_PCT=0
-if [ -f "$DIFF_RENDER_FILE" ]; then
-  # Explicit null check — `.skipped // true` would treat real false as null.
-  DIFF_SKIPPED=$(jq -r '.skipped | if . == null then "true" else tostring end' "$DIFF_RENDER_FILE" 2>/dev/null || echo "true")
-  if [ "$DIFF_SKIPPED" = "false" ]; then
-    DIFF_AVAILABLE=true
-    DIFF_RENDERED_WORDS=$(jq -r '.renderedWordCount // 0' "$DIFF_RENDER_FILE")
-    DIFF_DELTA_PCT=$(jq -r '.deltaPct // 0' "$DIFF_RENDER_FILE")
-  fi
-fi
-BOTS_JSON="{}"
-# Accumulators for per-category averages (across bots)
-CAT_ACCESSIBILITY_SUM=0
-CAT_CONTENT_SUM=0
-CAT_STRUCTURED_SUM=0
-CAT_TECHNICAL_SUM=0
-CAT_AI_SUM=0
-CAT_N=0
-# Accumulators for overall weighted composite
-OVERALL_WEIGHTED_SUM=0
-OVERALL_WEIGHT_TOTAL=0
-for bot_id in $BOTS; do
-  FETCH="$RESULTS_DIR/fetch-$bot_id.json"
-  META="$RESULTS_DIR/meta-$bot_id.json"
-  JSONLD="$RESULTS_DIR/jsonld-$bot_id.json"
-  LINKS="$RESULTS_DIR/links-$bot_id.json"
-  ROBOTS="$RESULTS_DIR/robots-$bot_id.json"
-  BOT_NAME=$(jget "$FETCH" '.bot.name' "$bot_id")
-  STATUS=$(jget_num "$FETCH" '.status')
-  TOTAL_TIME=$(jget_num "$FETCH" '.timing.total')
-  SERVER_WORD_COUNT=$(jget_num "$FETCH" '.wordCount')
-  # Read with explicit null fallback — jq's `//` is unsafe here because it
-  # treats boolean false as falsy, which is exactly the value we need to see.
-  RENDERS_JS=$(jq -r '.bot.rendersJavaScript | if . == null then "unknown" else tostring end' "$FETCH" 2>/dev/null || echo "unknown")
-  ROBOTS_ALLOWED=$(jget_bool "$ROBOTS" '.allowed')
-  # Effective word count depends on JS rendering capability:
-  # - true (e.g. Googlebot) + diff-render data → rendered DOM word count
-  # - false (AI training/search bots, observed) → server HTML only, with
-  #   penalty proportional to the rendering delta
-  # - unknown → conservative: server HTML (same as false but no penalty)
-  EFFECTIVE_WORD_COUNT=$SERVER_WORD_COUNT
-  HYDRATION_PENALTY=0
-  MISSED_WORDS=0
-  if [ "$DIFF_AVAILABLE" = "true" ]; then
-    if [ "$RENDERS_JS" = "true" ]; then
-      EFFECTIVE_WORD_COUNT=$DIFF_RENDERED_WORDS
-    elif [ "$RENDERS_JS" = "false" ]; then
-      # Absolute-value delta: if rendered DOM has materially more than server,
-      # AI bots are missing that content.
-      ABS_DELTA=$(awk -v d="$DIFF_DELTA_PCT" 'BEGIN { printf "%d", (d < 0 ? -d : d) + 0.5 }')
-      if [ "$ABS_DELTA" -gt 5 ]; then
-        # Scale penalty: 5% delta = 0, 10% = 5, 20%+ = 15 (cap)
-        HYDRATION_PENALTY=$(awk -v d="$ABS_DELTA" 'BEGIN {
-          p = (d - 5)
-          if (p > 15) p = 15
-          printf "%d", p
-        }')
-      fi
-      MISSED_WORDS=$((DIFF_RENDERED_WORDS - SERVER_WORD_COUNT))
-      [ "$MISSED_WORDS" -lt 0 ] && MISSED_WORDS=0
-    fi
-  fi
-  # --- Category 1: Accessibility (0-100) ---
-  ACC=0
-  # robots.txt allows: 40
-  [ "$ROBOTS_ALLOWED" = "true" ] && ACC=$((ACC + 40))
-  # HTTP 200: 40
-  [ "$STATUS" = "200" ] && ACC=$((ACC + 40))
-  # Response time: <2s = 20, <5s = 10, else 0
-  TIME_SCORE=$(awk -v t="$TOTAL_TIME" 'BEGIN { if (t < 2) print 20; else if (t < 5) print 10; else print 0 }')
-  ACC=$((ACC + TIME_SCORE))
-  # --- Category 2: Content Visibility (0-100) ---
-  CONTENT=0
-  if [ "$EFFECTIVE_WORD_COUNT" -ge 300 ]; then CONTENT=$((CONTENT + 30))
-  elif [ "$EFFECTIVE_WORD_COUNT" -ge 150 ]; then CONTENT=$((CONTENT + 20))
-  elif [ "$EFFECTIVE_WORD_COUNT" -ge 50 ]; then CONTENT=$((CONTENT + 10))
-  fi
-  H1_COUNT=$(jget_num "$META" '.headings.h1.count')
-  H2_COUNT=$(jget_num "$META" '.headings.h2.count')
-  [ "$H1_COUNT" -ge 1 ] && CONTENT=$((CONTENT + 20))
-  [ "$H2_COUNT" -ge 1 ] && CONTENT=$((CONTENT + 15))
-  INTERNAL_LINKS=$(jget_num "$LINKS" '.counts.internal')
-  if [ "$INTERNAL_LINKS" -ge 5 ]; then CONTENT=$((CONTENT + 20))
-  elif [ "$INTERNAL_LINKS" -ge 1 ]; then CONTENT=$((CONTENT + 10))
-  fi
-  IMG_TOTAL=$(jget_num "$META" '.images.total')
-  IMG_WITH_ALT=$(jget_num "$META" '.images.withAlt')
-  if [ "$IMG_TOTAL" -eq 0 ]; then
-    CONTENT=$((CONTENT + 15))
-  else
-    ALT_SCORE=$(awk -v a="$IMG_WITH_ALT" -v t="$IMG_TOTAL" 'BEGIN { printf "%d", (a / t) * 15 }')
-    CONTENT=$((CONTENT + ALT_SCORE))
-  fi
-  # Apply hydration penalty for non-rendering bots that are missing content
-  CONTENT=$((CONTENT - HYDRATION_PENALTY))
-  [ $CONTENT -lt 0 ] && CONTENT=0
-  # --- Category 3: Structured Data (0-100) ---
-  STRUCTURED=0
-  JSONLD_COUNT=$(jget_num "$JSONLD" '.blockCount')
-  JSONLD_VALID=$(jget_num "$JSONLD" '.validCount')
-  JSONLD_INVALID=$(jget_num "$JSONLD" '.invalidCount')
-  HAS_ORG=$(jget_bool "$JSONLD" '.flags.hasOrganization')
-  HAS_WEBSITE=$(jget_bool "$JSONLD" '.flags.hasWebSite')
-  HAS_BREADCRUMB=$(jget_bool "$JSONLD" '.flags.hasBreadcrumbList')
-  HAS_ARTICLE=$(jget_bool "$JSONLD" '.flags.hasArticle')
-  HAS_PRODUCT=$(jget_bool "$JSONLD" '.flags.hasProduct')
-  HAS_FAQ=$(jget_bool "$JSONLD" '.flags.hasFAQPage')
-  [ "$JSONLD_COUNT" -ge 1 ] && STRUCTURED=$((STRUCTURED + 30))
-  if [ "$JSONLD_COUNT" -ge 1 ] && [ "$JSONLD_INVALID" -eq 0 ]; then
-    STRUCTURED=$((STRUCTURED + 20))
-  fi
-  if [ "$HAS_ORG" = "true" ] || [ "$HAS_WEBSITE" = "true" ]; then
-    STRUCTURED=$((STRUCTURED + 20))
-  fi
-  [ "$HAS_BREADCRUMB" = "true" ] && STRUCTURED=$((STRUCTURED + 15))
-  if [ "$HAS_ARTICLE" = "true" ] || [ "$HAS_PRODUCT" = "true" ] || [ "$HAS_FAQ" = "true" ]; then
-    STRUCTURED=$((STRUCTURED + 15))
-  fi
-  # --- Category 4: Technical Signals (0-100) ---
-  TECHNICAL=0
-  TITLE=$(jget "$META" '.title' "")
-  DESCRIPTION=$(jget "$META" '.description' "")
-  CANONICAL=$(jget "$META" '.canonical' "")
-  OG_TITLE=$(jget "$META" '.og.title' "")
-  OG_DESC=$(jget "$META" '.og.description' "")
-  [ -n "$TITLE" ] && [ "$TITLE" != "null" ] && TECHNICAL=$((TECHNICAL + 25))
-  [ -n "$DESCRIPTION" ] && [ "$DESCRIPTION" != "null" ] && TECHNICAL=$((TECHNICAL + 25))
-  [ -n "$CANONICAL" ] && [ "$CANONICAL" != "null" ] && TECHNICAL=$((TECHNICAL + 20))
-  if [ -n "$OG_TITLE" ] && [ "$OG_TITLE" != "null" ]; then TECHNICAL=$((TECHNICAL + 8)); fi
-  if [ -n "$OG_DESC" ] && [ "$OG_DESC" != "null" ]; then TECHNICAL=$((TECHNICAL + 7)); fi
-  SITEMAP_EXISTS=$(jget_bool "$SITEMAP_FILE" '.exists')
-  SITEMAP_CONTAINS=$(jget_bool "$SITEMAP_FILE" '.containsTarget')
-  if [ "$SITEMAP_EXISTS" = "true" ] && [ "$SITEMAP_CONTAINS" = "true" ]; then
-    TECHNICAL=$((TECHNICAL + 15))
-  elif [ "$SITEMAP_EXISTS" = "true" ]; then
-    TECHNICAL=$((TECHNICAL + 10))
-  fi
-  # --- Category 5: AI Readiness (0-100) ---
-  AI=0
-  LLMS_EXISTS=$(jget_bool "$LLMSTXT_FILE" '.llmsTxt.exists')
-  LLMS_HAS_TITLE=$(jget_bool "$LLMSTXT_FILE" '.llmsTxt.hasTitle')
-  LLMS_HAS_DESC=$(jget_bool "$LLMSTXT_FILE" '.llmsTxt.hasDescription')
-  LLMS_URLS=$(jget_num "$LLMSTXT_FILE" '.llmsTxt.urlCount')
-  if [ "$LLMS_EXISTS" = "true" ]; then
-    AI=$((AI + 40))
-    [ "$LLMS_HAS_TITLE" = "true" ] && AI=$((AI + 7))
-    [ "$LLMS_HAS_DESC" = "true" ] && AI=$((AI + 7))
-    [ "$LLMS_URLS" -ge 1 ] && AI=$((AI + 6))
-  fi
-  # Content citable (>= 200 words, effective for this bot)
-  [ "$EFFECTIVE_WORD_COUNT" -ge 200 ] && AI=$((AI + 20))
-  # Semantic clarity: has H1 + description
-  if [ "$H1_COUNT" -ge 1 ] && [ -n "$DESCRIPTION" ] && [ "$DESCRIPTION" != "null" ]; then
-    AI=$((AI + 20))
-  fi
-  # Cap categories at 100
-  [ $ACC -gt 100 ] && ACC=100
-  [ $CONTENT -gt 100 ] && CONTENT=100
-  [ $STRUCTURED -gt 100 ] && STRUCTURED=100
-  [ $TECHNICAL -gt 100 ] && TECHNICAL=100
-  [ $AI -gt 100 ] && AI=100
-  # Per-bot composite score (weighted average of 5 categories)
-  BOT_SCORE=$(awk -v a=$ACC -v c=$CONTENT -v s=$STRUCTURED -v t=$TECHNICAL -v ai=$AI \
-    -v wa=$W_ACCESSIBILITY -v wc=$W_CONTENT -v ws=$W_STRUCTURED -v wt=$W_TECHNICAL -v wai=$W_AI \
-    'BEGIN { printf "%d", (a*wa + c*wc + s*ws + t*wt + ai*wai) / (wa+wc+ws+wt+wai) + 0.5 }')
-  BOT_GRADE=$(grade_for "$BOT_SCORE")
-  ACC_GRADE=$(grade_for "$ACC")
-  CONTENT_GRADE=$(grade_for "$CONTENT")
-  STRUCTURED_GRADE=$(grade_for "$STRUCTURED")
-  TECHNICAL_GRADE=$(grade_for "$TECHNICAL")
-  AI_GRADE=$(grade_for "$AI")
-  BOT_OBJ=$(jq -n \
-    --arg id "$bot_id" \
-    --arg name "$BOT_NAME" \
-    --arg rendersJs "$RENDERS_JS" \
-    --argjson score "$BOT_SCORE" \
-    --arg grade "$BOT_GRADE" \
-    --argjson acc "$ACC" \
-    --arg accGrade "$ACC_GRADE" \
-    --argjson content "$CONTENT" \
-    --arg contentGrade "$CONTENT_GRADE" \
-    --argjson structured "$STRUCTURED" \
-    --arg structuredGrade "$STRUCTURED_GRADE" \
-    --argjson technical "$TECHNICAL" \
-    --arg technicalGrade "$TECHNICAL_GRADE" \
-    --argjson ai "$AI" \
-    --arg aiGrade "$AI_GRADE" \
-    --argjson serverWords "$SERVER_WORD_COUNT" \
-    --argjson effectiveWords "$EFFECTIVE_WORD_COUNT" \
-    --argjson missedWords "$MISSED_WORDS" \
-    --argjson hydrationPenalty "$HYDRATION_PENALTY" \
-    '{
-      id: $id,
-      name: $name,
-      rendersJavaScript: (if $rendersJs == "true" then true elif $rendersJs == "false" then false else $rendersJs end),
-      score: $score,
-      grade: $grade,
-      visibility: {
-        serverWords: $serverWords,
-        effectiveWords: $effectiveWords,
-        missedWordsVsRendered: $missedWords,
-        hydrationPenaltyPts: $hydrationPenalty
-      },
-      categories: {
-        accessibility:     { score: $acc,        grade: $accGrade },
-        contentVisibility: { score: $content,    grade: $contentGrade },
-        structuredData:    { score: $structured, grade: $structuredGrade },
-        technicalSignals:  { score: $technical,  grade: $technicalGrade },
-        aiReadiness:       { score: $ai,         grade: $aiGrade }
-      }
-    }')
-  BOTS_JSON=$(printf '%s' "$BOTS_JSON" | jq --argjson bot "$BOT_OBJ" --arg id "$bot_id" '.[$id] = $bot')
-  # Accumulate category averages
-  CAT_ACCESSIBILITY_SUM=$((CAT_ACCESSIBILITY_SUM + ACC))
-  CAT_CONTENT_SUM=$((CAT_CONTENT_SUM + CONTENT))
-  CAT_STRUCTURED_SUM=$((CAT_STRUCTURED_SUM + STRUCTURED))
-  CAT_TECHNICAL_SUM=$((CAT_TECHNICAL_SUM + TECHNICAL))
-  CAT_AI_SUM=$((CAT_AI_SUM + AI))
-  CAT_N=$((CAT_N + 1))
-  # Accumulate weighted overall
-  W=$(overall_weight "$bot_id")
-  if [ "$W" -gt 0 ]; then
-    OVERALL_WEIGHTED_SUM=$((OVERALL_WEIGHTED_SUM + BOT_SCORE * W))
-    OVERALL_WEIGHT_TOTAL=$((OVERALL_WEIGHT_TOTAL + W))
-  fi
-done
-# Per-category averages (across all bots)
-CAT_ACC_AVG=$((CAT_ACCESSIBILITY_SUM / CAT_N))
-CAT_CONTENT_AVG=$((CAT_CONTENT_SUM / CAT_N))
-CAT_STRUCTURED_AVG=$((CAT_STRUCTURED_SUM / CAT_N))
-CAT_TECHNICAL_AVG=$((CAT_TECHNICAL_SUM / CAT_N))
-CAT_AI_AVG=$((CAT_AI_SUM / CAT_N))
-# Overall composite
-if [ "$OVERALL_WEIGHT_TOTAL" -gt 0 ]; then
-  OVERALL_SCORE=$((OVERALL_WEIGHTED_SUM / OVERALL_WEIGHT_TOTAL))
-else
-  # Fall back to simple average if none of the 4 standard bots are present
-  OVERALL_SCORE=$(((CAT_ACC_AVG + CAT_CONTENT_AVG + CAT_STRUCTURED_AVG + CAT_TECHNICAL_AVG + CAT_AI_AVG) / 5))
-fi
-OVERALL_GRADE=$(grade_for "$OVERALL_SCORE")
-CAT_ACC_GRADE=$(grade_for "$CAT_ACC_AVG")
-CAT_CONTENT_GRADE=$(grade_for "$CAT_CONTENT_AVG")
-CAT_STRUCTURED_GRADE=$(grade_for "$CAT_STRUCTURED_AVG")
-CAT_TECHNICAL_GRADE=$(grade_for "$CAT_TECHNICAL_AVG")
-CAT_AI_GRADE=$(grade_for "$CAT_AI_AVG")
-# Get the URL from the first fetch file
-FIRST_FETCH=$(ls "$RESULTS_DIR"/fetch-*.json | head -1)
-TARGET_URL=$(jget "$FIRST_FETCH" '.url' "")
-TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-jq -n \
-  --arg url "$TARGET_URL" \
-  --arg timestamp "$TIMESTAMP" \
-  --arg version "0.1.0" \
-  --argjson overallScore "$OVERALL_SCORE" \
-  --arg overallGrade "$OVERALL_GRADE" \
-  --argjson bots "$BOTS_JSON" \
-  --argjson catAcc "$CAT_ACC_AVG" \
-  --arg catAccGrade "$CAT_ACC_GRADE" \
-  --argjson catContent "$CAT_CONTENT_AVG" \
-  --arg catContentGrade "$CAT_CONTENT_GRADE" \
-  --argjson catStructured "$CAT_STRUCTURED_AVG" \
-  --arg catStructuredGrade "$CAT_STRUCTURED_GRADE" \
-  --argjson catTechnical "$CAT_TECHNICAL_AVG" \
-  --arg catTechnicalGrade "$CAT_TECHNICAL_GRADE" \
-  --argjson catAi "$CAT_AI_AVG" \
-  --arg catAiGrade "$CAT_AI_GRADE" \
-  '{
-    url: $url,
-    timestamp: $timestamp,
-    version: $version,
-    overall: { score: $overallScore, grade: $overallGrade },
-    bots: $bots,
-    categories: {
-      accessibility:     { score: $catAcc,        grade: $catAccGrade },
-      contentVisibility: { score: $catContent,    grade: $catContentGrade },
-      structuredData:    { score: $catStructured, grade: $catStructuredGrade },
-      technicalSignals:  { score: $catTechnical,  grade: $catTechnicalGrade },
-      aiReadiness:       { score: $catAi,         grade: $catAiGrade }
-    }
-  }'