PyPI - sibylline-scurl - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

sibylline-scurl 0.2.0py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

scurl/cli.py CHANGED Viewed

@@ -11,7 +11,6 @@ from .middleware import (
 )
 from .request_middleware import SecretDefender
 from .response_middleware import ReadabilityExtractor
-from .prompt_defender import PromptInjectionDefender
 from .curl import parse_curl_args, execute_curl, curl_result_to_response_context
@@ -22,7 +21,7 @@ REQUEST_MIDDLEWARE = {
 RESPONSE_MIDDLEWARE = {
     "readability": ("ReadabilityExtractor", "Extracts clean markdown from HTML", ReadabilityExtractor),
-    "prompt-defender": ("PromptInjectionDefender", "Detects prompt injection in web content", PromptInjectionDefender),
+    "prompt-defender": ("PromptInjectionDefender", "Detects prompt injection in web content", None),  # Lazy loaded
 }
@@ -42,6 +41,7 @@ class ScurlFlags:
     """Parsed scurl-specific flags."""
     raw: bool = False
     render: bool = False
+    readability: bool = False
     disable: set[str] = field(default_factory=set)
     enable: set[str] = field(default_factory=set)
     list_middleware: bool = False
@@ -65,6 +65,9 @@ def extract_scurl_flags(args: list[str]) -> tuple[ScurlFlags, list[str]]:
         elif arg == "--render":
             flags.render = True
             i += 1
+        elif arg == "--readability":
+            flags.readability = True
+            i += 1
         elif arg == "--disable":
             if i + 1 < len(args):
                 flags.disable.add(args[i + 1])
@@ -119,6 +122,7 @@ def print_help() -> None:
     print("scurl-specific options:")
     print("  --raw                  Disable all response middleware (raw curl output)")
     print("  --render               Use headless browser for JS-rendered pages")
+    print("  --readability          Extract article content (strips nav, ads, etc.)")
     print("  --disable <middleware>  Disable a middleware by slug (can be repeated)")
     print("  --enable <middleware>  Enable an opt-in middleware (can be repeated)")
     print("  --list-middleware      List available middleware and their slugs")
@@ -213,9 +217,10 @@ def run(args: Optional[list[str]] = None) -> int:
     response_chain = ResponseMiddlewareChain()
     if not flags.raw:
         if "readability" not in flags.disable:
-            response_chain.add(ReadabilityExtractor())
+            response_chain.add(ReadabilityExtractor(use_readability=flags.readability))
         # Prompt defender is opt-in (requires --enable)
         if "prompt-defender" in flags.enable:
+            from .prompt_defender import PromptInjectionDefender
             response_chain.add(PromptInjectionDefender(
                 threshold=flags.injection_threshold,
                 action=flags.injection_action,

scurl/prompt_defender/middleware.py CHANGED Viewed

@@ -5,8 +5,6 @@ from dataclasses import dataclass
 from typing import Dict, List, Set, Tuple
 import json
-import numpy as np
 from ..middleware import ResponseMiddleware, ResponseContext, ResponseMiddlewareResult
 from .normalizer import TextNormalizer
 from .patterns import PatternExtractor, PATTERN_CATEGORIES
@@ -209,6 +207,8 @@ class PromptInjectionDefender(ResponseMiddleware):
         Returns:
             InjectionAnalysis with detection results.
         """
+        import numpy as np  # Lazy import - optional dependency
         # Normalize text to defeat obfuscation
         normalized = self._normalizer.normalize(text)

scurl/response_middleware.py CHANGED Viewed

@@ -20,11 +20,13 @@ class ReadabilityExtractor(ResponseMiddleware):
         include_images: bool = True,
         include_tables: bool = True,
         body_width: int = 0,
+        use_readability: bool = False,
     ):
         self._include_links = include_links
         self._include_images = include_images
         self._include_tables = include_tables
         self._body_width = body_width
+        self._use_readability = use_readability
     @property
     def name(self) -> str:
@@ -105,16 +107,18 @@ class ReadabilityExtractor(ResponseMiddleware):
     def process(self, context: ResponseContext) -> ResponseMiddlewareResult:
         """Extract markdown from HTML.
-        Tries readability + html2text first, falls back to html2text direct
-        for content readability can't handle.
+        By default uses html2text directly for full page content.
+        With use_readability=True, tries readability article extraction first.
         """
         html = context.body.decode("utf-8", errors="replace")
         url = context.url or ""
-        # Try readability + html2text first (best for article-like content)
-        result = self._extract_with_readability(html, url)
+        result = None
+        if self._use_readability:
+            # Try readability + html2text (best for article-like content)
+            result = self._extract_with_readability(html, url)
-        # Fall back to html2text direct (preserves links, may include boilerplate)
+        # Use html2text direct (full page, preserves all content)
         if not result:
             result = self._extract_with_html2text_direct(html)

{sibylline_scurl-0.2.0.dist-info → sibylline_scurl-0.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sibylline-scurl
-Version: 0.2.0
+Version: 0.2.3
 Summary: A secure curl wrapper with middleware support and HTML-to-markdown extraction
 Author: Nathan
 License: MIT
@@ -76,7 +76,7 @@ scurl -H "Accept: application/json" https://api.example.com/data
 ## Features
 - **SecretDefender**: Automatically detects and blocks requests containing exposed secrets/tokens
-- **ReadabilityExtractor**: Extracts clean markdown from HTML responses using readability + html2text
+- **HTML to Markdown**: Converts HTML responses to clean markdown (use `--readability` for article extraction)
 - **Middleware System**: Composable request and response middleware
 ## Why scurl?
@@ -124,7 +124,9 @@ was added later...
 | Flag | Description |
 |------|-------------|
-| `--raw` | Disable all response middleware |
+| `--raw` | Disable all response middleware (raw HTML output) |
+| `--readability` | Extract article content only (strips nav, ads, sidebars) |
+| `--render` | Use headless browser for JS-rendered pages |
 | `--disable <slug>` | Disable a middleware by slug (can be repeated) |
 | `--enable <slug>` | Override a middleware's block (can be repeated) |
 | `--list-middleware` | List available middleware and their slugs |

{sibylline_scurl-0.2.0.dist-info → sibylline_scurl-0.2.3.dist-info}/RECORD RENAMED Viewed

@@ -1,22 +1,22 @@
 scurl/__init__.py,sha256=ycKB1BvXjpJObD-zYujBLX0n1BGu9WpqkLvizv_JH8E,84
 scurl/browser.py,sha256=ICLz2AI8b9zmNunLbk__IArvJZ1dJv5e4GKcZaXIhcI,2466
-scurl/cli.py,sha256=mfi_YzCNlF3h7ipx9Unj2Y0cljmt1r584s3cI7tNSsc,8777
+scurl/cli.py,sha256=rqAinrbhnOdbiHMO4a-jtMTrTqQ5ZS7t58bZST5H-oI,9027
 scurl/curl.py,sha256=S4NfAd0VfYrTbjn0RyMwZk-C14AkBa7YOT6GfOdedz0,5648
 scurl/middleware.py,sha256=-On84ovv9y5U5Ti5oJOtEGQyunnk0nipFwOuLnvYphw,5275
 scurl/request_middleware.py,sha256=LLGwQJ96cj5lX0Umkwf0T61W6fEmwieBCKr0Is3EfQk,8202
-scurl/response_middleware.py,sha256=FeS9XaM191QyJRrXuGL9-D-g_tVzCQ9M8mCuvb4NjYg,5290
+scurl/response_middleware.py,sha256=QqbUs7OZ9k_XfJSD-IzOOEA7DQI6OzYWRoGUKwY82gI,5441
 scurl/sanitize.py,sha256=tSsLLHoSsohDFgaWOzrv9Qfzi-vzKUOmkRR-LcnMH6o,2144
 scurl/prompt_defender/__init__.py,sha256=2cGKzoG85MaWvaQNuCcsPYEKQ_TzC0E54o9hH-NiHvA,615
 scurl/prompt_defender/classifier.py,sha256=NEIN2oh2eiBoeGObFgG9NnE5n5qJktkttVAhgNE74t4,5993
 scurl/prompt_defender/embedder.py,sha256=VlyEkoTZd47bIBUaLfKy-flPO3YIrYqAiFRAzcHx31Q,8769
-scurl/prompt_defender/middleware.py,sha256=Ilx5p3mi9vgyTwWCU2Y2oigv9nqc8wIc-X8hRUP2keY,18121
+scurl/prompt_defender/middleware.py,sha256=H7p0TrQ1Mc5wjTIFR938X78JcuheTnoo1RDY890MPIw,18166
 scurl/prompt_defender/motifs.py,sha256=Rm606Lp0s0haz-uc8aURbeb3iGQExvIGJJ7KtEICC3M,11491
 scurl/prompt_defender/normalizer.py,sha256=IKwlbWXhQbem1pzuFjqR8HUssT6v1jpal4Y8KybSkY4,5026
 scurl/prompt_defender/patterns.py,sha256=d0fJjtkKwgK6kI-H-ucpHtStZi0yM2tBA9wiyEmFxjQ,9674
 scurl/prompt_defender/windowing.py,sha256=HJS-w6lsPWt1_SIDo5nG1m22r4vslWZyRHG8n-QqgH8,12598
 scurl/prompt_defender/models/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 scurl/prompt_defender/models/prompt_injection_rf.pkl,sha256=wW1NXC2xiExx2dj5D3uZivKlaZ_XrF59KxdUU4b5BQg,1321504
-sibylline_scurl-0.2.0.dist-info/METADATA,sha256=hxhwCJ2DSS2Dj8oKWuP-4_YJWDsQuVW2m9mWOvFgd_o,5113
-sibylline_scurl-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-sibylline_scurl-0.2.0.dist-info/entry_points.txt,sha256=iza-x5PFyqniQVHvoFBlKKvr32aJCulbgqGdYC8grAA,41
-sibylline_scurl-0.2.0.dist-info/RECORD,,
+sibylline_scurl-0.2.3.dist-info/METADATA,sha256=ITs-GFqntnKrNCwqARy8TpKIpnZPkLhs9YuxvXCzVNo,5279
+sibylline_scurl-0.2.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+sibylline_scurl-0.2.3.dist-info/entry_points.txt,sha256=iza-x5PFyqniQVHvoFBlKKvr32aJCulbgqGdYC8grAA,41
+sibylline_scurl-0.2.3.dist-info/RECORD,,

{sibylline_scurl-0.2.0.dist-info → sibylline_scurl-0.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{sibylline_scurl-0.2.0.dist-info → sibylline_scurl-0.2.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

sibylline-scurl 0.2.0__py3-none-any.whl → 0.2.3__py3-none-any.whl

sibylline-scurl 0.2.0py3-none-any.whl → 0.2.3py3-none-any.whl