imgstat 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/analyze.sh CHANGED
@@ -5,88 +5,140 @@ source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/utils.sh"
5
5
  cmd_analyze() {
6
6
  local dir="$1"
7
7
  echo "Analyzing codebase for remote image references in $dir..."
8
-
9
- # Scan common code files for URLs
10
- # We extract http/https links specifically in src, href, url() contexts
11
- local urls=()
12
-
13
- # Read all matching files, use grep to find URLs, and sort uniquely
14
- # This uses a basic grep regex to find common image URL patterns.
15
- mapfile -t urls < <(find "$dir" \
16
- -type d \( -name "node_modules" -o -name ".git" -o -name "dist" -o -name "build" \) -prune -o \
17
- -type f \( -name "*.html" -o -name "*.jsx" -o -name "*.tsx" -o -name "*.js" -o -name "*.ts" -o -name "*.vue" -o -name "*.css" -o -name "*.scss" \) \
8
+
9
+ # ── Step 1: Extract all http/https URLs from common code files ──────────────
10
+ local all_urls=()
11
+ mapfile -t all_urls < <(find "$dir" \
12
+ -type d \( -name "node_modules" -o -name ".git" -o -name "dist" -o -name "build" -o -name "vendor" -o -name ".next" -o -name "coverage" \) -prune -o \
13
+ -type f \( -name "*.html" -o -name "*.jsx" -o -name "*.tsx" -o -name "*.js" -o -name "*.ts" \
14
+ -o -name "*.vue" -o -name "*.css" -o -name "*.scss" -o -name "*.php" \
15
+ -o -name "*.py" -o -name "*.rb" -o -name "*.svelte" -o -name "*.astro" \) \
18
16
  -exec grep -oE "https?://[^\"')[:space:]]+" {} + 2>/dev/null | sort -u)
19
17
 
20
- if [[ ${#urls[@]} -eq 0 ]]; then
18
+ if [[ ${#all_urls[@]} -eq 0 ]]; then
21
19
  echo "No remote URLs found in code files."
22
20
  return 0
23
21
  fi
24
-
25
- echo "Found ${#urls[@]} unique URL(s). Fetching dimensions..."
26
-
27
- # Prepare secure fetch directory
22
+
23
+ echo "Found ${#all_urls[@]} unique URL(s). Classifying..."
24
+ echo ""
25
+
26
+ # ── Step 2: Classify each URL via 3-tier pipeline ──────────────────────────
27
+ local image_extensions=("jpg" "jpeg" "png" "gif" "webp" "avif" "svg" "bmp" "tiff" "tif" "ico")
28
+
29
+ local queued_urls=()
30
+
31
+ for url in "${all_urls[@]}"; do
32
+ # Strip query/fragment to inspect the path cleanly
33
+ local path
34
+ path=$(echo "$url" | sed 's/[?#].*//')
35
+ local lower_path
36
+ lower_path=$(echo "$path" | tr '[:upper:]' '[:lower:]')
37
+
38
+ # ── Tier 1: Obvious non-image check (fast, no network) ──────────────────
39
+ if is_obvious_non_image "$url"; then
40
+ printf " \033[2m⏭ Skip (pattern match) : %s\033[0m\n" "$url"
41
+ continue
42
+ fi
43
+
44
+ # ── Tier 2: Image extension in path (fast, no network) ───────────────────
45
+ local ext
46
+ ext=$(echo "${lower_path##*.}" | sed 's/[^a-z]//g')
47
+ local is_image_ext=false
48
+ for img_ext in "${image_extensions[@]}"; do
49
+ if [[ "$ext" == "$img_ext" ]]; then
50
+ is_image_ext=true
51
+ break
52
+ fi
53
+ done
54
+
55
+ if [[ "$is_image_ext" == "true" ]]; then
56
+ printf " \033[1;32m✓ Queue (extension) : %s\033[0m\n" "$url"
57
+ queued_urls+=("$url")
58
+ continue
59
+ fi
60
+
61
+ # ── Tier 3: HTTP HEAD Content-Type check (network, no body download) ─────
62
+ printf " \033[33m? Check (HEAD request) : %s\033[0m" "$url"
63
+ if check_content_type_is_image "$url"; then
64
+ printf "\r \033[1;32m✓ Queue (content-type) : %s\033[0m\n" "$url"
65
+ queued_urls+=("$url")
66
+ else
67
+ printf "\r \033[2m⏭ Skip (not image CT) : %s\033[0m\n" "$url"
68
+ fi
69
+ done
70
+
71
+ echo ""
72
+
73
+ if [[ ${#queued_urls[@]} -eq 0 ]]; then
74
+ echo "No image URLs found after classification."
75
+ return 0
76
+ fi
77
+
78
+ echo "Fetching dimensions for ${#queued_urls[@]} image URL(s)..."
79
+ echo ""
80
+
81
+ # ── Step 3: Download & measure queued image URLs ───────────────────────────
28
82
  local TEMP_DIR
29
83
  TEMP_DIR=$(mktemp -d)
30
84
  trap 'rm -rf "$TEMP_DIR"' EXIT
31
-
85
+
32
86
  # Output file setup
33
87
  local rules_dir="$dir/.agent/rules"
34
88
  local rules_file="$rules_dir/image_dimensions.md"
35
-
36
89
  mkdir -p "$rules_dir"
37
-
38
- # Write header for the agent
90
+
91
+ # Write header
39
92
  cat << 'EOF' > "$rules_file"
40
93
  # Codebase Remote Images
41
94
 
42
95
  > [!NOTE]
43
- > This file is strictly auto-generated by \`imgstat\`.
96
+ > This file is strictly auto-generated by `imgstat`.
44
97
  > It maps remote image URLs found in the codebase to their exact physical dimensions.
45
98
  > AI assistants should use this dictionary when asked about layout constraints, native sizes, or aspect ratios of referenced images.
46
99
 
47
100
  | Documented URL | Detected Size (W x H) |
48
101
  |---|---|
49
102
  EOF
50
-
103
+
51
104
  local processed=0
52
-
53
- for url in "${urls[@]}"; do
54
- # Skip obvious non-images if possible, but Cloudinary etc don't have extensions
55
- # so we attempt a shallow fetch for all to be safe.
56
-
57
- # 1. Fetch direct URL
58
- wget -q --content-disposition -P "$TEMP_DIR" "$url" || true
59
-
105
+
106
+ for url in "${queued_urls[@]}"; do
107
+ # Download the image content only
108
+ wget -q --content-disposition --max-redirect=5 \
109
+ --user-agent="Mozilla/5.0" \
110
+ -P "$TEMP_DIR" "$url" 2>/dev/null || true
111
+
60
112
  local all_files=( "$TEMP_DIR"/* )
61
- local found_images=()
62
-
113
+ local found_image=""
114
+
63
115
  for file in "${all_files[@]}"; do
64
- if [[ ! -f "$file" ]]; then continue; fi
116
+ [[ ! -f "$file" ]] && continue
65
117
  local mimetype
66
118
  mimetype=$(file -b --mime-type "$file" 2>/dev/null || true)
67
119
  if [[ "$mimetype" == image/* ]]; then
68
- found_images+=("$file")
120
+ found_image="$file"
121
+ break
69
122
  fi
70
123
  done
71
-
72
- # Process only the first valid image found for the URL
73
- if [[ ${#found_images[@]} -gt 0 ]]; then
74
- local file="${found_images[0]}"
124
+
125
+ if [[ -n "$found_image" ]]; then
75
126
  local dim
76
- dim=$(get_dimensions "$file" || true)
77
-
127
+ dim=$(get_dimensions "$found_image" || true)
78
128
  if [[ -n "$dim" ]]; then
79
129
  local w="${dim% *}"
80
130
  local h="${dim#* }"
81
131
  echo "| \`$url\` | **${w}x${h}** |" >> "$rules_file"
132
+ printf " \033[1;32m📐 %s → %sx%s\033[0m\n" "$url" "$w" "$h"
82
133
  processed=$((processed + 1))
83
134
  fi
84
135
  fi
85
-
86
- # Clean temp dir contents for the next URL to avoid collision
136
+
137
+ # Clean temp dir for next URL to avoid collisions
87
138
  rm -rf "${TEMP_DIR:?}"/*
88
139
  done
89
-
140
+
141
+ echo ""
90
142
  echo "Analysis complete!"
91
143
  echo "Documented $processed valid image reference(s) into: $rules_file"
92
144
  echo "This file can now be read by autonomous coding agents."
package/lib/utils.sh CHANGED
@@ -62,3 +62,84 @@ format_filename() {
62
62
 
63
63
  echo "$dirname/$new_name"
64
64
  }
65
+
66
+ # Returns 0 (true) if the URL is obviously NOT an image.
67
+ # Uses domain and path-extension heuristics — no network call needed.
68
+ # Usage: is_obvious_non_image "https://..."
69
+ is_obvious_non_image() {
70
+ local url="$1"
71
+
72
+ # Strip query string / fragment for path inspection
73
+ local path
74
+ path=$(echo "$url" | sed 's/[?#].*//')
75
+
76
+ # --- Known non-image domains ---
77
+ local non_image_domains=(
78
+ "fonts.googleapis.com"
79
+ "fonts.gstatic.com"
80
+ "googletagmanager.com"
81
+ "google-analytics.com"
82
+ "www.google-analytics.com"
83
+ "youtube.com"
84
+ "www.youtube.com"
85
+ "youtu.be"
86
+ "vimeo.com"
87
+ "www.vimeo.com"
88
+ "maps.google.com"
89
+ "maps.googleapis.com"
90
+ "plausible.io"
91
+ "cdn.rawgit.com"
92
+ "ajax.googleapis.com"
93
+ "cdnjs.cloudflare.com"
94
+ )
95
+ local domain
96
+ domain=$(echo "$url" | sed -E 's|https?://([^/]+).*|\1|')
97
+ for d in "${non_image_domains[@]}"; do
98
+ if [[ "$domain" == "$d" ]]; then
99
+ return 0
100
+ fi
101
+ done
102
+
103
+ # --- Non-image path extensions ---
104
+ local lower_path
105
+ lower_path=$(echo "$path" | tr '[:upper:]' '[:lower:]')
106
+ local non_image_exts=(".js" ".mjs" ".css" ".scss" ".json" ".xml" ".html"
107
+ ".woff" ".woff2" ".ttf" ".eot" ".otf"
108
+ ".pdf" ".zip" ".tar" ".gz"
109
+ ".mp4" ".mp3" ".ogg" ".webm" ".avi" ".mov"
110
+ ".txt" ".md" ".csv"
111
+ )
112
+ for ext in "${non_image_exts[@]}"; do
113
+ if [[ "$lower_path" == *"$ext" ]]; then
114
+ return 0
115
+ fi
116
+ done
117
+
118
+ # --- Non-image URL path fragments ---
119
+ local non_image_patterns=("/api/" "/feed" "/rss" "/sitemap" "/graphql" "/oauth" "/auth/" "/login" "/logout")
120
+ for pat in "${non_image_patterns[@]}"; do
121
+ if [[ "$lower_path" == *"$pat"* ]]; then
122
+ return 0
123
+ fi
124
+ done
125
+
126
+ return 1
127
+ }
128
+
129
+ # Returns 0 (true) if the URL's HTTP Content-Type header indicates an image.
130
+ # Uses a lightweight HEAD request — no body downloaded.
131
+ # Usage: check_content_type_is_image "https://..."
132
+ check_content_type_is_image() {
133
+ local url="$1"
134
+ local content_type
135
+ content_type=$(curl -sI --max-time 8 --location \
136
+ -H "User-Agent: Mozilla/5.0" \
137
+ "$url" 2>/dev/null \
138
+ | grep -i '^content-type:' \
139
+ | tail -1 \
140
+ | tr -d '\r')
141
+ if [[ "$content_type" == *"image/"* ]]; then
142
+ return 0
143
+ fi
144
+ return 1
145
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "imgstat",
3
- "version": "2.0.4",
3
+ "version": "2.0.5",
4
4
  "description": "Embeds image dimensions directly into filenames for natural AI context.",
5
5
  "scripts": {
6
6
  "test": "echo \"Error: no test specified\" && exit 1"