npm - imgstat - Versions diffs - 2.0.4 → 2.0.5 - Mend

imgstat 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/lib/analyze.sh CHANGED Viewed

@@ -5,88 +5,140 @@ source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/utils.sh"
 cmd_analyze() {
   local dir="$1"
   echo "Analyzing codebase for remote image references in $dir..."
-  # Scan common code files for URLs
-  # We extract http/https links specifically in src, href, url() contexts
-  local urls=()
-  # Read all matching files, use grep to find URLs, and sort uniquely
-  # This uses a basic grep regex to find common image URL patterns.
-  mapfile -t urls < <(find "$dir" \
-    -type d \( -name "node_modules" -o -name ".git" -o -name "dist" -o -name "build" \) -prune -o \
-    -type f \( -name "*.html" -o -name "*.jsx" -o -name "*.tsx" -o -name "*.js" -o -name "*.ts" -o -name "*.vue" -o -name "*.css" -o -name "*.scss" \) \
+  # ── Step 1: Extract all http/https URLs from common code files ──────────────
+  local all_urls=()
+  mapfile -t all_urls < <(find "$dir" \
+    -type d \( -name "node_modules" -o -name ".git" -o -name "dist" -o -name "build" -o -name "vendor" -o -name ".next" -o -name "coverage" \) -prune -o \
+    -type f \( -name "*.html" -o -name "*.jsx" -o -name "*.tsx" -o -name "*.js" -o -name "*.ts" \
+               -o -name "*.vue" -o -name "*.css" -o -name "*.scss" -o -name "*.php" \
+               -o -name "*.py" -o -name "*.rb" -o -name "*.svelte" -o -name "*.astro" \) \
     -exec grep -oE "https?://[^\"')[:space:]]+" {} + 2>/dev/null | sort -u)
-  if [[ ${#urls[@]} -eq 0 ]]; then
+  if [[ ${#all_urls[@]} -eq 0 ]]; then
     echo "No remote URLs found in code files."
     return 0
   fi
-  echo "Found ${#urls[@]} unique URL(s). Fetching dimensions..."
-  # Prepare secure fetch directory
+  echo "Found ${#all_urls[@]} unique URL(s). Classifying..."
+  echo ""
+  # ── Step 2: Classify each URL via 3-tier pipeline ──────────────────────────
+  local image_extensions=("jpg" "jpeg" "png" "gif" "webp" "avif" "svg" "bmp" "tiff" "tif" "ico")
+  local queued_urls=()
+  for url in "${all_urls[@]}"; do
+    # Strip query/fragment to inspect the path cleanly
+    local path
+    path=$(echo "$url" | sed 's/[?#].*//')
+    local lower_path
+    lower_path=$(echo "$path" | tr '[:upper:]' '[:lower:]')
+    # ── Tier 1: Obvious non-image check (fast, no network) ──────────────────
+    if is_obvious_non_image "$url"; then
+      printf "  \033[2m⏭  Skip  (pattern match) : %s\033[0m\n" "$url"
+      continue
+    fi
+    # ── Tier 2: Image extension in path (fast, no network) ───────────────────
+    local ext
+    ext=$(echo "${lower_path##*.}" | sed 's/[^a-z]//g')
+    local is_image_ext=false
+    for img_ext in "${image_extensions[@]}"; do
+      if [[ "$ext" == "$img_ext" ]]; then
+        is_image_ext=true
+        break
+      fi
+    done
+    if [[ "$is_image_ext" == "true" ]]; then
+      printf "  \033[1;32m✓  Queue (extension)     : %s\033[0m\n" "$url"
+      queued_urls+=("$url")
+      continue
+    fi
+    # ── Tier 3: HTTP HEAD Content-Type check (network, no body download) ─────
+    printf "  \033[33m?  Check (HEAD request)  : %s\033[0m" "$url"
+    if check_content_type_is_image "$url"; then
+      printf "\r  \033[1;32m✓  Queue (content-type)  : %s\033[0m\n" "$url"
+      queued_urls+=("$url")
+    else
+      printf "\r  \033[2m⏭  Skip  (not image CT)  : %s\033[0m\n" "$url"
+    fi
+  done
+  echo ""
+  if [[ ${#queued_urls[@]} -eq 0 ]]; then
+    echo "No image URLs found after classification."
+    return 0
+  fi
+  echo "Fetching dimensions for ${#queued_urls[@]} image URL(s)..."
+  echo ""
+  # ── Step 3: Download & measure queued image URLs ───────────────────────────
   local TEMP_DIR
   TEMP_DIR=$(mktemp -d)
   trap 'rm -rf "$TEMP_DIR"' EXIT
   # Output file setup
   local rules_dir="$dir/.agent/rules"
   local rules_file="$rules_dir/image_dimensions.md"
   mkdir -p "$rules_dir"
-  # Write header for the agent
+  # Write header
   cat << 'EOF' > "$rules_file"
 # Codebase Remote Images
 > [!NOTE]
-> This file is strictly auto-generated by \`imgstat\`.
+> This file is strictly auto-generated by `imgstat`.
 > It maps remote image URLs found in the codebase to their exact physical dimensions.
 > AI assistants should use this dictionary when asked about layout constraints, native sizes, or aspect ratios of referenced images.
 | Documented URL | Detected Size (W x H) |
 |---|---|
 EOF
   local processed=0
-  for url in "${urls[@]}"; do
-    # Skip obvious non-images if possible, but Cloudinary etc don't have extensions
-    # so we attempt a shallow fetch for all to be safe.
-    # 1. Fetch direct URL
-    wget -q --content-disposition -P "$TEMP_DIR" "$url" || true
+  for url in "${queued_urls[@]}"; do
+    # Download the image content only
+    wget -q --content-disposition --max-redirect=5 \
+      --user-agent="Mozilla/5.0" \
+      -P "$TEMP_DIR" "$url" 2>/dev/null || true
     local all_files=( "$TEMP_DIR"/* )
-    local found_images=()
+    local found_image=""
     for file in "${all_files[@]}"; do
-      if [[ ! -f "$file" ]]; then continue; fi
+      [[ ! -f "$file" ]] && continue
       local mimetype
       mimetype=$(file -b --mime-type "$file" 2>/dev/null || true)
       if [[ "$mimetype" == image/* ]]; then
-        found_images+=("$file")
+        found_image="$file"
+        break
       fi
     done
-    # Process only the first valid image found for the URL
-    if [[ ${#found_images[@]} -gt 0 ]]; then
-      local file="${found_images[0]}"
+    if [[ -n "$found_image" ]]; then
       local dim
-      dim=$(get_dimensions "$file" || true)
+      dim=$(get_dimensions "$found_image" || true)
       if [[ -n "$dim" ]]; then
         local w="${dim% *}"
         local h="${dim#* }"
         echo "| \`$url\` | **${w}x${h}** |" >> "$rules_file"
+        printf "  \033[1;32m📐 %s → %sx%s\033[0m\n" "$url" "$w" "$h"
         processed=$((processed + 1))
       fi
     fi
-    # Clean temp dir contents for the next URL to avoid collision
+    # Clean temp dir for next URL to avoid collisions
     rm -rf "${TEMP_DIR:?}"/*
   done
+  echo ""
   echo "Analysis complete!"
   echo "Documented $processed valid image reference(s) into: $rules_file"
   echo "This file can now be read by autonomous coding agents."

package/lib/utils.sh CHANGED Viewed

@@ -62,3 +62,84 @@ format_filename() {
   echo "$dirname/$new_name"
 }
+# Returns 0 (true) if the URL is obviously NOT an image.
+# Uses domain and path-extension heuristics — no network call needed.
+# Usage: is_obvious_non_image "https://..."
+is_obvious_non_image() {
+  local url="$1"
+  # Strip query string / fragment for path inspection
+  local path
+  path=$(echo "$url" | sed 's/[?#].*//')
+  # --- Known non-image domains ---
+  local non_image_domains=(
+    "fonts.googleapis.com"
+    "fonts.gstatic.com"
+    "googletagmanager.com"
+    "google-analytics.com"
+    "www.google-analytics.com"
+    "youtube.com"
+    "www.youtube.com"
+    "youtu.be"
+    "vimeo.com"
+    "www.vimeo.com"
+    "maps.google.com"
+    "maps.googleapis.com"
+    "plausible.io"
+    "cdn.rawgit.com"
+    "ajax.googleapis.com"
+    "cdnjs.cloudflare.com"
+  )
+  local domain
+  domain=$(echo "$url" | sed -E 's|https?://([^/]+).*|\1|')
+  for d in "${non_image_domains[@]}"; do
+    if [[ "$domain" == "$d" ]]; then
+      return 0
+    fi
+  done
+  # --- Non-image path extensions ---
+  local lower_path
+  lower_path=$(echo "$path" | tr '[:upper:]' '[:lower:]')
+  local non_image_exts=(".js" ".mjs" ".css" ".scss" ".json" ".xml" ".html"
+    ".woff" ".woff2" ".ttf" ".eot" ".otf"
+    ".pdf" ".zip" ".tar" ".gz"
+    ".mp4" ".mp3" ".ogg" ".webm" ".avi" ".mov"
+    ".txt" ".md" ".csv"
+  )
+  for ext in "${non_image_exts[@]}"; do
+    if [[ "$lower_path" == *"$ext" ]]; then
+      return 0
+    fi
+  done
+  # --- Non-image URL path fragments ---
+  local non_image_patterns=("/api/" "/feed" "/rss" "/sitemap" "/graphql" "/oauth" "/auth/" "/login" "/logout")
+  for pat in "${non_image_patterns[@]}"; do
+    if [[ "$lower_path" == *"$pat"* ]]; then
+      return 0
+    fi
+  done
+  return 1
+}
+# Returns 0 (true) if the URL's HTTP Content-Type header indicates an image.
+# Uses a lightweight HEAD request — no body downloaded.
+# Usage: check_content_type_is_image "https://..."
+check_content_type_is_image() {
+  local url="$1"
+  local content_type
+  content_type=$(curl -sI --max-time 8 --location \
+    -H "User-Agent: Mozilla/5.0" \
+    "$url" 2>/dev/null \
+    | grep -i '^content-type:' \
+    | tail -1 \
+    | tr -d '\r')
+  if [[ "$content_type" == *"image/"* ]]; then
+    return 0
+  fi
+  return 1
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "imgstat",
-  "version": "2.0.4",
+  "version": "2.0.5",
   "description": "Embeds image dimensions directly into filenames for natural AI context.",
   "scripts": {
     "test": "echo \"Error: no test specified\" && exit 1"