imgstat 2.0.4 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/analyze.sh +94 -42
- package/lib/utils.sh +81 -0
- package/package.json +1 -1
package/lib/analyze.sh
CHANGED
|
@@ -5,88 +5,140 @@ source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/utils.sh"
|
|
|
5
5
|
cmd_analyze() {
|
|
6
6
|
local dir="$1"
|
|
7
7
|
echo "Analyzing codebase for remote image references in $dir..."
|
|
8
|
-
|
|
9
|
-
#
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
-type d \( -name "node_modules" -o -name ".git" -o -name "dist" -o -name "build" \) -prune -o \
|
|
17
|
-
-type f \( -name "*.html" -o -name "*.jsx" -o -name "*.tsx" -o -name "*.js" -o -name "*.ts" -o -name "*.vue" -o -name "*.css" -o -name "*.scss" \) \
|
|
8
|
+
|
|
9
|
+
# ── Step 1: Extract all http/https URLs from common code files ──────────────
|
|
10
|
+
local all_urls=()
|
|
11
|
+
mapfile -t all_urls < <(find "$dir" \
|
|
12
|
+
-type d \( -name "node_modules" -o -name ".git" -o -name "dist" -o -name "build" -o -name "vendor" -o -name ".next" -o -name "coverage" \) -prune -o \
|
|
13
|
+
-type f \( -name "*.html" -o -name "*.jsx" -o -name "*.tsx" -o -name "*.js" -o -name "*.ts" \
|
|
14
|
+
-o -name "*.vue" -o -name "*.css" -o -name "*.scss" -o -name "*.php" \
|
|
15
|
+
-o -name "*.py" -o -name "*.rb" -o -name "*.svelte" -o -name "*.astro" \) \
|
|
18
16
|
-exec grep -oE "https?://[^\"')[:space:]]+" {} + 2>/dev/null | sort -u)
|
|
19
17
|
|
|
20
|
-
if [[ ${#
|
|
18
|
+
if [[ ${#all_urls[@]} -eq 0 ]]; then
|
|
21
19
|
echo "No remote URLs found in code files."
|
|
22
20
|
return 0
|
|
23
21
|
fi
|
|
24
|
-
|
|
25
|
-
echo "Found ${#
|
|
26
|
-
|
|
27
|
-
|
|
22
|
+
|
|
23
|
+
echo "Found ${#all_urls[@]} unique URL(s). Classifying..."
|
|
24
|
+
echo ""
|
|
25
|
+
|
|
26
|
+
# ── Step 2: Classify each URL via 3-tier pipeline ──────────────────────────
|
|
27
|
+
local image_extensions=("jpg" "jpeg" "png" "gif" "webp" "avif" "svg" "bmp" "tiff" "tif" "ico")
|
|
28
|
+
|
|
29
|
+
local queued_urls=()
|
|
30
|
+
|
|
31
|
+
for url in "${all_urls[@]}"; do
|
|
32
|
+
# Strip query/fragment to inspect the path cleanly
|
|
33
|
+
local path
|
|
34
|
+
path=$(echo "$url" | sed 's/[?#].*//')
|
|
35
|
+
local lower_path
|
|
36
|
+
lower_path=$(echo "$path" | tr '[:upper:]' '[:lower:]')
|
|
37
|
+
|
|
38
|
+
# ── Tier 1: Obvious non-image check (fast, no network) ──────────────────
|
|
39
|
+
if is_obvious_non_image "$url"; then
|
|
40
|
+
printf " \033[2m⏭ Skip (pattern match) : %s\033[0m\n" "$url"
|
|
41
|
+
continue
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
# ── Tier 2: Image extension in path (fast, no network) ───────────────────
|
|
45
|
+
local ext
|
|
46
|
+
ext=$(echo "${lower_path##*.}" | sed 's/[^a-z]//g')
|
|
47
|
+
local is_image_ext=false
|
|
48
|
+
for img_ext in "${image_extensions[@]}"; do
|
|
49
|
+
if [[ "$ext" == "$img_ext" ]]; then
|
|
50
|
+
is_image_ext=true
|
|
51
|
+
break
|
|
52
|
+
fi
|
|
53
|
+
done
|
|
54
|
+
|
|
55
|
+
if [[ "$is_image_ext" == "true" ]]; then
|
|
56
|
+
printf " \033[1;32m✓ Queue (extension) : %s\033[0m\n" "$url"
|
|
57
|
+
queued_urls+=("$url")
|
|
58
|
+
continue
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
# ── Tier 3: HTTP HEAD Content-Type check (network, no body download) ─────
|
|
62
|
+
printf " \033[33m? Check (HEAD request) : %s\033[0m" "$url"
|
|
63
|
+
if check_content_type_is_image "$url"; then
|
|
64
|
+
printf "\r \033[1;32m✓ Queue (content-type) : %s\033[0m\n" "$url"
|
|
65
|
+
queued_urls+=("$url")
|
|
66
|
+
else
|
|
67
|
+
printf "\r \033[2m⏭ Skip (not image CT) : %s\033[0m\n" "$url"
|
|
68
|
+
fi
|
|
69
|
+
done
|
|
70
|
+
|
|
71
|
+
echo ""
|
|
72
|
+
|
|
73
|
+
if [[ ${#queued_urls[@]} -eq 0 ]]; then
|
|
74
|
+
echo "No image URLs found after classification."
|
|
75
|
+
return 0
|
|
76
|
+
fi
|
|
77
|
+
|
|
78
|
+
echo "Fetching dimensions for ${#queued_urls[@]} image URL(s)..."
|
|
79
|
+
echo ""
|
|
80
|
+
|
|
81
|
+
# ── Step 3: Download & measure queued image URLs ───────────────────────────
|
|
28
82
|
local TEMP_DIR
|
|
29
83
|
TEMP_DIR=$(mktemp -d)
|
|
30
84
|
trap 'rm -rf "$TEMP_DIR"' EXIT
|
|
31
|
-
|
|
85
|
+
|
|
32
86
|
# Output file setup
|
|
33
87
|
local rules_dir="$dir/.agent/rules"
|
|
34
88
|
local rules_file="$rules_dir/image_dimensions.md"
|
|
35
|
-
|
|
36
89
|
mkdir -p "$rules_dir"
|
|
37
|
-
|
|
38
|
-
# Write header
|
|
90
|
+
|
|
91
|
+
# Write header
|
|
39
92
|
cat << 'EOF' > "$rules_file"
|
|
40
93
|
# Codebase Remote Images
|
|
41
94
|
|
|
42
95
|
> [!NOTE]
|
|
43
|
-
> This file is strictly auto-generated by
|
|
96
|
+
> This file is strictly auto-generated by `imgstat`.
|
|
44
97
|
> It maps remote image URLs found in the codebase to their exact physical dimensions.
|
|
45
98
|
> AI assistants should use this dictionary when asked about layout constraints, native sizes, or aspect ratios of referenced images.
|
|
46
99
|
|
|
47
100
|
| Documented URL | Detected Size (W x H) |
|
|
48
101
|
|---|---|
|
|
49
102
|
EOF
|
|
50
|
-
|
|
103
|
+
|
|
51
104
|
local processed=0
|
|
52
|
-
|
|
53
|
-
for url in "${
|
|
54
|
-
#
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
105
|
+
|
|
106
|
+
for url in "${queued_urls[@]}"; do
|
|
107
|
+
# Download the image content only
|
|
108
|
+
wget -q --content-disposition --max-redirect=5 \
|
|
109
|
+
--user-agent="Mozilla/5.0" \
|
|
110
|
+
-P "$TEMP_DIR" "$url" 2>/dev/null || true
|
|
111
|
+
|
|
60
112
|
local all_files=( "$TEMP_DIR"/* )
|
|
61
|
-
local
|
|
62
|
-
|
|
113
|
+
local found_image=""
|
|
114
|
+
|
|
63
115
|
for file in "${all_files[@]}"; do
|
|
64
|
-
|
|
116
|
+
[[ ! -f "$file" ]] && continue
|
|
65
117
|
local mimetype
|
|
66
118
|
mimetype=$(file -b --mime-type "$file" 2>/dev/null || true)
|
|
67
119
|
if [[ "$mimetype" == image/* ]]; then
|
|
68
|
-
|
|
120
|
+
found_image="$file"
|
|
121
|
+
break
|
|
69
122
|
fi
|
|
70
123
|
done
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
if [[ ${#found_images[@]} -gt 0 ]]; then
|
|
74
|
-
local file="${found_images[0]}"
|
|
124
|
+
|
|
125
|
+
if [[ -n "$found_image" ]]; then
|
|
75
126
|
local dim
|
|
76
|
-
dim=$(get_dimensions "$
|
|
77
|
-
|
|
127
|
+
dim=$(get_dimensions "$found_image" || true)
|
|
78
128
|
if [[ -n "$dim" ]]; then
|
|
79
129
|
local w="${dim% *}"
|
|
80
130
|
local h="${dim#* }"
|
|
81
131
|
echo "| \`$url\` | **${w}x${h}** |" >> "$rules_file"
|
|
132
|
+
printf " \033[1;32m📐 %s → %sx%s\033[0m\n" "$url" "$w" "$h"
|
|
82
133
|
processed=$((processed + 1))
|
|
83
134
|
fi
|
|
84
135
|
fi
|
|
85
|
-
|
|
86
|
-
# Clean temp dir
|
|
136
|
+
|
|
137
|
+
# Clean temp dir for next URL to avoid collisions
|
|
87
138
|
rm -rf "${TEMP_DIR:?}"/*
|
|
88
139
|
done
|
|
89
|
-
|
|
140
|
+
|
|
141
|
+
echo ""
|
|
90
142
|
echo "Analysis complete!"
|
|
91
143
|
echo "Documented $processed valid image reference(s) into: $rules_file"
|
|
92
144
|
echo "This file can now be read by autonomous coding agents."
|
package/lib/utils.sh
CHANGED
|
@@ -62,3 +62,84 @@ format_filename() {
|
|
|
62
62
|
|
|
63
63
|
echo "$dirname/$new_name"
|
|
64
64
|
}
|
|
65
|
+
|
|
66
|
+
# Returns 0 (true) if the URL is obviously NOT an image.
|
|
67
|
+
# Uses domain and path-extension heuristics — no network call needed.
|
|
68
|
+
# Usage: is_obvious_non_image "https://..."
|
|
69
|
+
is_obvious_non_image() {
|
|
70
|
+
local url="$1"
|
|
71
|
+
|
|
72
|
+
# Strip query string / fragment for path inspection
|
|
73
|
+
local path
|
|
74
|
+
path=$(echo "$url" | sed 's/[?#].*//')
|
|
75
|
+
|
|
76
|
+
# --- Known non-image domains ---
|
|
77
|
+
local non_image_domains=(
|
|
78
|
+
"fonts.googleapis.com"
|
|
79
|
+
"fonts.gstatic.com"
|
|
80
|
+
"googletagmanager.com"
|
|
81
|
+
"google-analytics.com"
|
|
82
|
+
"www.google-analytics.com"
|
|
83
|
+
"youtube.com"
|
|
84
|
+
"www.youtube.com"
|
|
85
|
+
"youtu.be"
|
|
86
|
+
"vimeo.com"
|
|
87
|
+
"www.vimeo.com"
|
|
88
|
+
"maps.google.com"
|
|
89
|
+
"maps.googleapis.com"
|
|
90
|
+
"plausible.io"
|
|
91
|
+
"cdn.rawgit.com"
|
|
92
|
+
"ajax.googleapis.com"
|
|
93
|
+
"cdnjs.cloudflare.com"
|
|
94
|
+
)
|
|
95
|
+
local domain
|
|
96
|
+
domain=$(echo "$url" | sed -E 's|https?://([^/]+).*|\1|')
|
|
97
|
+
for d in "${non_image_domains[@]}"; do
|
|
98
|
+
if [[ "$domain" == "$d" ]]; then
|
|
99
|
+
return 0
|
|
100
|
+
fi
|
|
101
|
+
done
|
|
102
|
+
|
|
103
|
+
# --- Non-image path extensions ---
|
|
104
|
+
local lower_path
|
|
105
|
+
lower_path=$(echo "$path" | tr '[:upper:]' '[:lower:]')
|
|
106
|
+
local non_image_exts=(".js" ".mjs" ".css" ".scss" ".json" ".xml" ".html"
|
|
107
|
+
".woff" ".woff2" ".ttf" ".eot" ".otf"
|
|
108
|
+
".pdf" ".zip" ".tar" ".gz"
|
|
109
|
+
".mp4" ".mp3" ".ogg" ".webm" ".avi" ".mov"
|
|
110
|
+
".txt" ".md" ".csv"
|
|
111
|
+
)
|
|
112
|
+
for ext in "${non_image_exts[@]}"; do
|
|
113
|
+
if [[ "$lower_path" == *"$ext" ]]; then
|
|
114
|
+
return 0
|
|
115
|
+
fi
|
|
116
|
+
done
|
|
117
|
+
|
|
118
|
+
# --- Non-image URL path fragments ---
|
|
119
|
+
local non_image_patterns=("/api/" "/feed" "/rss" "/sitemap" "/graphql" "/oauth" "/auth/" "/login" "/logout")
|
|
120
|
+
for pat in "${non_image_patterns[@]}"; do
|
|
121
|
+
if [[ "$lower_path" == *"$pat"* ]]; then
|
|
122
|
+
return 0
|
|
123
|
+
fi
|
|
124
|
+
done
|
|
125
|
+
|
|
126
|
+
return 1
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Returns 0 (true) if the URL's HTTP Content-Type header indicates an image.
|
|
130
|
+
# Uses a lightweight HEAD request — no body downloaded.
|
|
131
|
+
# Usage: check_content_type_is_image "https://..."
|
|
132
|
+
check_content_type_is_image() {
|
|
133
|
+
local url="$1"
|
|
134
|
+
local content_type
|
|
135
|
+
content_type=$(curl -sI --max-time 8 --location \
|
|
136
|
+
-H "User-Agent: Mozilla/5.0" \
|
|
137
|
+
"$url" 2>/dev/null \
|
|
138
|
+
| grep -i '^content-type:' \
|
|
139
|
+
| tail -1 \
|
|
140
|
+
| tr -d '\r')
|
|
141
|
+
if [[ "$content_type" == *"image/"* ]]; then
|
|
142
|
+
return 0
|
|
143
|
+
fi
|
|
144
|
+
return 1
|
|
145
|
+
}
|