PyPI - sifr-benchmark - Versions diffs - 0.1.15__py3-none-any.whl - Mend

sifr-benchmark 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

sifr_benchmark/__init__.py +22 -0
sifr_benchmark/capture.py +242 -0
sifr_benchmark/capture_e2llm.py +230 -0
sifr_benchmark/cli.py +358 -0
sifr_benchmark/formats.py +162 -0
sifr_benchmark/ground_truth.py +250 -0
sifr_benchmark/models.py +110 -0
sifr_benchmark/runner.py +315 -0
sifr_benchmark/scoring.py +117 -0
sifr_benchmark/verify.py +224 -0
sifr_benchmark-0.1.15.dist-info/METADATA +186 -0
sifr_benchmark-0.1.15.dist-info/RECORD +15 -0
sifr_benchmark-0.1.15.dist-info/WHEEL +4 -0
sifr_benchmark-0.1.15.dist-info/entry_points.txt +2 -0
sifr_benchmark-0.1.15.dist-info/licenses/LICENSE +21 -0

sifr_benchmark/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""
+SiFR Benchmark - Evaluate LLM understanding of web UI across formats.
+Usage:
+    pip install sifr-benchmark
+    sifr-bench --help
+"""
+__version__ = "0.1.15"
+__author__ = "SiFR Contributors"
+from .runner import BenchmarkRunner
+from .scoring import score_response
+from .formats import load_sifr, load_html, load_axtree
+__all__ = [
+    "BenchmarkRunner",
+    "score_response",
+    "load_sifr",
+    "load_html",
+    "load_axtree",
+]

sifr_benchmark/capture.py ADDED Viewed

@@ -0,0 +1,242 @@
+"""
+Page capture module - captures pages in all formats.
+"""
+import json
+import asyncio
+from pathlib import Path
+from typing import Optional
+from dataclasses import dataclass
+@dataclass
+class CaptureResult:
+    url: str
+    sifr_path: Optional[Path] = None
+    html_path: Optional[Path] = None
+    screenshot_path: Optional[Path] = None
+    axtree_path: Optional[Path] = None
+    error: Optional[str] = None
+def check_playwright():
+    """Check if playwright is installed."""
+    try:
+        from playwright.sync_api import sync_playwright
+        return True
+    except ImportError:
+        return False
+def install_playwright_browsers():
+    """Install playwright browsers."""
+    import subprocess
+    subprocess.run(["playwright", "install", "chromium"], check=True)
+def generate_sifr_from_page(page) -> dict:
+    """Generate SiFR format from Playwright page."""
+    # Get page info
+    url = page.url
+    title = page.title()
+    viewport = page.viewport_size
+    # Extract elements using JavaScript
+    elements = page.evaluate("""() => {
+        const results = { high: {}, med: {}, low: {} };
+        const buttons = document.querySelectorAll('button, [role="button"], input[type="submit"]');
+        const links = document.querySelectorAll('a[href]');
+        const inputs = document.querySelectorAll('input, textarea, select');
+        const headings = document.querySelectorAll('h1, h2, h3');
+        let btnCount = 1, linkCount = 1, inputCount = 1, textCount = 1;
+        // High salience: buttons, main inputs
+        buttons.forEach((el, i) => {
+            if (i < 10) {
+                const rect = el.getBoundingClientRect();
+                results.high['btn' + String(btnCount++).padStart(3, '0')] = {
+                    type: 'button',
+                    text: el.textContent?.trim().slice(0, 50) || el.value || '',
+                    position: [Math.round(rect.x), Math.round(rect.y), Math.round(rect.width), Math.round(rect.height)],
+                    state: el.disabled ? 'disabled' : 'enabled'
+                };
+            }
+        });
+        // High salience: main input
+        inputs.forEach((el, i) => {
+            if (i < 5) {
+                const rect = el.getBoundingClientRect();
+                results.high['inp' + String(inputCount++).padStart(3, '0')] = {
+                    type: 'input',
+                    placeholder: el.placeholder || '',
+                    input_type: el.type || 'text',
+                    position: [Math.round(rect.x), Math.round(rect.y), Math.round(rect.width), Math.round(rect.height)],
+                    state: el.disabled ? 'disabled' : 'enabled'
+                };
+            }
+        });
+        // Med salience: links
+        links.forEach((el, i) => {
+            if (i < 20) {
+                const rect = el.getBoundingClientRect();
+                if (!results.med.link) results.med.link = {};
+                results.med['lnk' + String(linkCount++).padStart(3, '0')] = {
+                    type: 'link',
+                    text: el.textContent?.trim().slice(0, 50) || '',
+                    href: el.href,
+                    position: [Math.round(rect.x), Math.round(rect.y), Math.round(rect.width), Math.round(rect.height)]
+                };
+            }
+        });
+        // Low salience: headings as text
+        headings.forEach((el, i) => {
+            const rect = el.getBoundingClientRect();
+            results.low['txt' + String(textCount++).padStart(3, '0')] = {
+                type: 'text',
+                content: el.textContent?.trim().slice(0, 100) || '',
+                tag: el.tagName.toLowerCase(),
+                position: [Math.round(rect.x), Math.round(rect.y), Math.round(rect.width), Math.round(rect.height)]
+            };
+        });
+        return results;
+    }""")
+    # Build SiFR structure
+    sifr = {
+        "====METADATA====": {
+            "format": "sifr-v2.0",
+            "url": url,
+            "title": title,
+            "viewport": viewport,
+            "stats": {
+                "high": len(elements.get("high", {})),
+                "med": len(elements.get("med", {})),
+                "low": len(elements.get("low", {}))
+            }
+        },
+        "====NODES====": elements,
+        "====SUMMARY====": {
+            "page": {
+                "purpose": f"Page at {url}",
+                "title": title
+            }
+        }
+    }
+    return sifr
+def get_accessibility_tree(page) -> dict:
+    """Get accessibility tree from page."""
+    snapshot = page.accessibility.snapshot()
+    return snapshot or {}
+def capture_page(
+    url: str,
+    output_dir: Path,
+    name: str,
+    formats: list[str] = None,
+    headless: bool = True
+) -> CaptureResult:
+    """
+    Capture a page in multiple formats.
+    Args:
+        url: URL to capture
+        output_dir: Output directory
+        name: Base name for files
+        formats: List of formats to capture (sifr, html, screenshot, axtree)
+        headless: Run browser in headless mode
+    Returns:
+        CaptureResult with paths to captured files
+    """
+    if not check_playwright():
+        return CaptureResult(url=url, error="Playwright not installed. Run: pip install playwright && playwright install chromium")
+    if formats is None:
+        formats = ["sifr", "html", "screenshot", "axtree"]
+    from playwright.sync_api import sync_playwright
+    result = CaptureResult(url=url)
+    # Create output directories
+    output_dir = Path(output_dir)
+    (output_dir / "sifr").mkdir(parents=True, exist_ok=True)
+    (output_dir / "html").mkdir(parents=True, exist_ok=True)
+    (output_dir / "screenshots").mkdir(parents=True, exist_ok=True)
+    (output_dir / "axtree").mkdir(parents=True, exist_ok=True)
+    try:
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=headless)
+            page = browser.new_page(viewport={"width": 1920, "height": 1080})
+            # Navigate
+            page.goto(url, wait_until="networkidle", timeout=30000)
+            page.wait_for_timeout(2000)  # Extra wait for dynamic content
+            # Capture SiFR
+            if "sifr" in formats:
+                sifr_data = generate_sifr_from_page(page)
+                sifr_path = output_dir / "sifr" / f"{name}.sifr"
+                with open(sifr_path, "w", encoding="utf-8") as f:
+                    json.dump(sifr_data, f, indent=2)
+                result.sifr_path = sifr_path
+            # Capture HTML
+            if "html" in formats:
+                html_content = page.content()
+                html_path = output_dir / "html" / f"{name}.html"
+                with open(html_path, "w", encoding="utf-8") as f:
+                    f.write(html_content)
+                result.html_path = html_path
+            # Capture Screenshot
+            if "screenshot" in formats:
+                screenshot_path = output_dir / "screenshots" / f"{name}.png"
+                page.screenshot(path=str(screenshot_path), full_page=False)
+                result.screenshot_path = screenshot_path
+            # Capture Accessibility Tree
+            if "axtree" in formats:
+                axtree = get_accessibility_tree(page)
+                axtree_path = output_dir / "axtree" / f"{name}.json"
+                with open(axtree_path, "w", encoding="utf-8") as f:
+                    json.dump(axtree, f, indent=2)
+                result.axtree_path = axtree_path
+            browser.close()
+    except Exception as e:
+        result.error = str(e)
+    return result
+def capture_multiple(
+    urls: list[str],
+    output_dir: Path,
+    formats: list[str] = None,
+    headless: bool = True
+) -> list[CaptureResult]:
+    """Capture multiple pages."""
+    results = []
+    for url in urls:
+        # Generate name from URL
+        from urllib.parse import urlparse
+        parsed = urlparse(url)
+        name = parsed.netloc.replace(".", "_").replace("www_", "")
+        result = capture_page(url, output_dir, name, formats, headless)
+        results.append(result)
+    return results

sifr_benchmark/capture_e2llm.py ADDED Viewed

@@ -0,0 +1,230 @@
+"""
+Capture pages using E2LLM extension API.
+Requires: pip install playwright
+First run: playwright install chromium
+"""
+import asyncio
+import json
+from pathlib import Path
+from typing import Optional
+from dataclasses import dataclass
+@dataclass
+class CaptureResult:
+    url: str
+    sifr: str
+    html: str
+    axtree: dict
+    screenshot: Optional[bytes] = None
+async def capture_with_e2llm(
+    page,
+    selector: str = "body",
+    timeout: int = 30000
+) -> dict:
+    """
+    Capture page using E2LLM extension CustomEvent API.
+    Returns:
+        dict with sifr (stringified), html, axtree, metadata
+    """
+    result = await page.evaluate("""
+        ([selector, timeout]) => {
+            return new Promise((resolve, reject) => {
+                const id = Date.now().toString();
+                const timer = setTimeout(() => {
+                    reject(new Error('E2LLM capture timeout - is extension installed?'));
+                }, timeout);
+                document.addEventListener('e2llm-capture-response', (e) => {
+                    if (e.detail && e.detail.requestId === id) {
+                        clearTimeout(timer);
+                        // E2LLM v2.6.x returns: {requestId, success, data, meta}
+                        // data contains the SiFR structure directly
+                        const response = e.detail;
+                        if (response.success && response.data) {
+                            resolve({
+                                sifr: JSON.stringify(response.data, null, 2),
+                                meta: response.meta || {},
+                                html: document.documentElement.outerHTML
+                            });
+                        } else {
+                            resolve({
+                                sifr: '',
+                                meta: {},
+                                html: document.documentElement.outerHTML,
+                                error: response.error || 'Unknown error'
+                            });
+                        }
+                    }
+                }, { once: true });
+                document.dispatchEvent(new CustomEvent('e2llm-capture-request', {
+                    detail: {
+                        requestId: id,
+                        selector: selector
+                    }
+                }));
+            });
+        }
+    """, [selector, timeout])
+    return result
+async def capture_page(
+    url: str,
+    extension_path: str,
+    user_data_dir: str = "./e2llm-chrome-profile",
+    headless: bool = False,
+    selector: str = "body"
+) -> CaptureResult:
+    """
+    Capture a page using Playwright + E2LLM extension.
+    """
+    from playwright.async_api import async_playwright
+    async with async_playwright() as p:
+        context = await p.chromium.launch_persistent_context(
+            user_data_dir=user_data_dir,
+            headless=headless,
+            args=[
+                f"--disable-extensions-except={extension_path}",
+                f"--load-extension={extension_path}",
+            ]
+        )
+        page = await context.new_page()
+        try:
+            await page.goto(url, wait_until="networkidle", timeout=30000)
+            await page.wait_for_timeout(2000)  # Wait for extension to be ready
+            result = await capture_with_e2llm(page, selector)
+            screenshot = await page.screenshot(full_page=True)
+            axtree = await page.accessibility.snapshot()
+            return CaptureResult(
+                url=url,
+                sifr=result.get("sifr", ""),
+                html=result.get("html", ""),
+                axtree=axtree or {},
+                screenshot=screenshot
+            )
+        finally:
+            await context.close()
+async def capture_multiple(
+    urls: list[str],
+    extension_path: str,
+    output_dir: str = "./datasets/formats",
+    user_data_dir: str = "./e2llm-chrome-profile"
+) -> list[CaptureResult]:
+    """
+    Capture multiple pages, saving to output directory.
+    """
+    from playwright.async_api import async_playwright
+    output = Path(output_dir)
+    (output / "sifr").mkdir(parents=True, exist_ok=True)
+    (output / "html").mkdir(parents=True, exist_ok=True)
+    (output / "axtree").mkdir(parents=True, exist_ok=True)
+    (output / "screenshots").mkdir(parents=True, exist_ok=True)
+    results = []
+    async with async_playwright() as p:
+        context = await p.chromium.launch_persistent_context(
+            user_data_dir=user_data_dir,
+            headless=False,
+            args=[
+                f"--disable-extensions-except={extension_path}",
+                f"--load-extension={extension_path}",
+            ]
+        )
+        page = await context.new_page()
+        for url in urls:
+            try:
+                print(f"Capturing: {url}")
+                await page.goto(url, wait_until="networkidle", timeout=30000)
+                await page.wait_for_timeout(2000)  # Wait for extension
+                result = await capture_with_e2llm(page)
+                screenshot = await page.screenshot(full_page=True)
+                # Get real accessibility tree via Playwright
+                axtree = await page.accessibility.snapshot()
+                # Generate page_id from URL
+                page_id = url.replace("https://", "").replace("http://", "")
+                page_id = page_id.replace("/", "_").replace(".", "_").rstrip("_")
+                sifr_content = result.get("sifr", "")
+                html_content = result.get("html", "")
+                # Save files
+                (output / "sifr" / f"{page_id}.sifr").write_text(
+                    sifr_content, encoding="utf-8"
+                )
+                (output / "html" / f"{page_id}.html").write_text(
+                    html_content, encoding="utf-8"
+                )
+                (output / "axtree" / f"{page_id}.json").write_text(
+                    json.dumps(axtree, indent=2, ensure_ascii=False),
+                    encoding="utf-8"
+                )
+                (output / "screenshots" / f"{page_id}.png").write_bytes(screenshot)
+                results.append(CaptureResult(
+                    url=url,
+                    sifr=sifr_content,
+                    html=html_content,
+                    axtree=axtree or {},
+                    screenshot=screenshot
+                ))
+                sifr_size = len(sifr_content)
+                print(f"  ✅ Saved: {page_id} (SiFR: {sifr_size} bytes)")
+                await page.wait_for_timeout(500)
+            except Exception as e:
+                print(f"  ❌ Error: {e}")
+                # Save empty files to avoid breaking pipeline
+                (output / "sifr" / f"{page_id}.sifr").write_text("", encoding="utf-8")
+                (output / "html" / f"{page_id}.html").write_text("", encoding="utf-8")
+        await context.close()
+    return results
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Capture pages using E2LLM extension")
+    parser.add_argument("urls", nargs="+", help="URLs to capture")
+    parser.add_argument("--extension", "-e", required=True, help="Path to E2LLM extension")
+    parser.add_argument("--output", "-o", default="./datasets/formats", help="Output directory")
+    parser.add_argument("--profile", default="./e2llm-chrome-profile", help="Chrome profile dir")
+    args = parser.parse_args()
+    asyncio.run(capture_multiple(
+        urls=args.urls,
+        extension_path=args.extension,
+        output_dir=args.output,
+        user_data_dir=args.profile
+    ))