@braedenbuilds/crawl-sim 1.0.5 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/.claude-plugin/marketplace.json +15 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/README.md +32 -9
  4. package/bin/install.js +6 -2
  5. package/package.json +8 -3
  6. package/{SKILL.md → skills/crawl-sim/SKILL.md} +23 -2
  7. package/{scripts → skills/crawl-sim/scripts}/_lib.sh +30 -0
  8. package/skills/crawl-sim/scripts/compute-score.sh +744 -0
  9. package/{scripts → skills/crawl-sim/scripts}/extract-jsonld.sh +12 -0
  10. package/skills/crawl-sim/scripts/fetch-as-bot.sh +151 -0
  11. package/skills/crawl-sim/scripts/schema-fields.sh +25 -0
  12. package/scripts/compute-score.sh +0 -424
  13. package/scripts/fetch-as-bot.sh +0 -87
  14. /package/{profiles → skills/crawl-sim/profiles}/chatgpt-user.json +0 -0
  15. /package/{profiles → skills/crawl-sim/profiles}/claude-searchbot.json +0 -0
  16. /package/{profiles → skills/crawl-sim/profiles}/claude-user.json +0 -0
  17. /package/{profiles → skills/crawl-sim/profiles}/claudebot.json +0 -0
  18. /package/{profiles → skills/crawl-sim/profiles}/googlebot.json +0 -0
  19. /package/{profiles → skills/crawl-sim/profiles}/gptbot.json +0 -0
  20. /package/{profiles → skills/crawl-sim/profiles}/oai-searchbot.json +0 -0
  21. /package/{profiles → skills/crawl-sim/profiles}/perplexity-user.json +0 -0
  22. /package/{profiles → skills/crawl-sim/profiles}/perplexitybot.json +0 -0
  23. /package/{scripts → skills/crawl-sim/scripts}/check-llmstxt.sh +0 -0
  24. /package/{scripts → skills/crawl-sim/scripts}/check-robots.sh +0 -0
  25. /package/{scripts → skills/crawl-sim/scripts}/check-sitemap.sh +0 -0
  26. /package/{scripts → skills/crawl-sim/scripts}/diff-render.sh +0 -0
  27. /package/{scripts → skills/crawl-sim/scripts}/extract-links.sh +0 -0
  28. /package/{scripts → skills/crawl-sim/scripts}/extract-meta.sh +0 -0
@@ -1,87 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
-
4
- # fetch-as-bot.sh — Fetch a URL as a specific bot User-Agent
5
- # Usage: fetch-as-bot.sh <url> <profile.json>
6
-
7
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
8
- # shellcheck source=_lib.sh
9
- . "$SCRIPT_DIR/_lib.sh"
10
-
11
- URL="${1:?Usage: fetch-as-bot.sh <url> <profile.json>}"
12
- PROFILE="${2:?Usage: fetch-as-bot.sh <url> <profile.json>}"
13
-
14
- BOT_ID=$(jq -r '.id' "$PROFILE")
15
- BOT_NAME=$(jq -r '.name' "$PROFILE")
16
- UA=$(jq -r '.userAgent' "$PROFILE")
17
- RENDERS_JS=$(jq -r '.rendersJavaScript' "$PROFILE")
18
-
19
- printf '[fetch-as-bot] %s <- %s\n' "$BOT_NAME" "$URL" >&2
20
-
21
- TMPDIR="${TMPDIR:-/tmp}"
22
- HEADERS_FILE=$(mktemp "$TMPDIR/crawlsim-headers.XXXXXX")
23
- BODY_FILE=$(mktemp "$TMPDIR/crawlsim-body.XXXXXX")
24
- trap 'rm -f "$HEADERS_FILE" "$BODY_FILE"' EXIT
25
-
26
- TIMING=$(curl -sS -L \
27
- -H "User-Agent: $UA" \
28
- -D "$HEADERS_FILE" \
29
- -o "$BODY_FILE" \
30
- -w '{"total":%{time_total},"ttfb":%{time_starttransfer},"connect":%{time_connect},"statusCode":%{http_code},"sizeDownload":%{size_download}}' \
31
- --max-time 30 \
32
- "$URL" 2>/dev/null || echo '{"total":0,"ttfb":0,"connect":0,"statusCode":0,"sizeDownload":0}')
33
-
34
- STATUS=$(echo "$TIMING" | jq -r '.statusCode')
35
- TOTAL_TIME=$(echo "$TIMING" | jq -r '.total')
36
- TTFB=$(echo "$TIMING" | jq -r '.ttfb')
37
- SIZE=$(echo "$TIMING" | jq -r '.sizeDownload')
38
-
39
- # Parse response headers into a JSON object using jq for safe escaping.
40
- # curl -L writes multiple blocks on redirect; jq keeps the last definition
41
- # of each header since `add` overwrites left-to-right.
42
- HEADERS_JSON=$(tr -d '\r' < "$HEADERS_FILE" \
43
- | grep -E '^[A-Za-z][A-Za-z0-9-]*:[[:space:]]' \
44
- | jq -Rs '
45
- split("\n")
46
- | map(select(length > 0))
47
- | map(capture("^(?<k>[^:]+):[[:space:]]*(?<v>.*)$"))
48
- | map({(.k): .v})
49
- | add // {}
50
- ')
51
-
52
- WORD_COUNT=$(count_words "$BODY_FILE")
53
- [ -z "$WORD_COUNT" ] && WORD_COUNT=0
54
-
55
- BODY_B64=""
56
- if [ -s "$BODY_FILE" ]; then
57
- BODY_B64=$(base64 < "$BODY_FILE")
58
- fi
59
-
60
- jq -n \
61
- --arg url "$URL" \
62
- --arg botId "$BOT_ID" \
63
- --arg botName "$BOT_NAME" \
64
- --arg ua "$UA" \
65
- --arg rendersJs "$RENDERS_JS" \
66
- --argjson status "$STATUS" \
67
- --argjson totalTime "$TOTAL_TIME" \
68
- --argjson ttfb "$TTFB" \
69
- --argjson size "$SIZE" \
70
- --argjson wordCount "$WORD_COUNT" \
71
- --argjson headers "$HEADERS_JSON" \
72
- --arg bodyBase64 "$BODY_B64" \
73
- '{
74
- url: $url,
75
- bot: {
76
- id: $botId,
77
- name: $botName,
78
- userAgent: $ua,
79
- rendersJavaScript: (if $rendersJs == "true" then true elif $rendersJs == "false" then false else $rendersJs end)
80
- },
81
- status: $status,
82
- timing: { total: $totalTime, ttfb: $ttfb },
83
- size: $size,
84
- wordCount: $wordCount,
85
- headers: $headers,
86
- bodyBase64: $bodyBase64
87
- }'