sibylline-scurl 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install -e ".[dev]" pytest-mock
28
+
29
+ - name: Run tests
30
+ run: pytest -v
31
+
32
+ lint:
33
+ runs-on: ubuntu-latest
34
+ steps:
35
+ - uses: actions/checkout@v4
36
+
37
+ - name: Set up Python
38
+ uses: actions/setup-python@v5
39
+ with:
40
+ python-version: "3.12"
41
+
42
+ - name: Install ruff
43
+ run: pip install ruff
44
+
45
+ - name: Run ruff
46
+ run: ruff check src/ tests/
@@ -0,0 +1,71 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ build:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v4
12
+
13
+ - name: Set up Python
14
+ uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.12"
17
+
18
+ - name: Install build dependencies
19
+ run: |
20
+ python -m pip install --upgrade pip
21
+ pip install build
22
+
23
+ - name: Build package
24
+ run: python -m build
25
+
26
+ - name: Upload artifacts
27
+ uses: actions/upload-artifact@v4
28
+ with:
29
+ name: dist
30
+ path: dist/
31
+
32
+ test:
33
+ runs-on: ubuntu-latest
34
+ needs: build
35
+ steps:
36
+ - uses: actions/checkout@v4
37
+
38
+ - name: Set up Python
39
+ uses: actions/setup-python@v5
40
+ with:
41
+ python-version: "3.12"
42
+
43
+ - name: Download artifacts
44
+ uses: actions/download-artifact@v4
45
+ with:
46
+ name: dist
47
+ path: dist/
48
+
49
+ - name: Install from wheel
50
+ run: pip install dist/*.whl
51
+
52
+ - name: Test CLI
53
+ run: |
54
+ scurl --help
55
+ scurl --list-middleware
56
+
57
+ publish-pypi:
58
+ runs-on: ubuntu-latest
59
+ needs: [build, test]
60
+
61
+ steps:
62
+ - name: Download artifacts
63
+ uses: actions/download-artifact@v4
64
+ with:
65
+ name: dist
66
+ path: dist/
67
+
68
+ - name: Publish to PyPI
69
+ uses: pypa/gh-action-pypi-publish@release/v1
70
+ with:
71
+ password: ${{ secrets.PYPI_TOKEN }}
@@ -0,0 +1,43 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual environments
24
+ .venv/
25
+ venv/
26
+ ENV/
27
+
28
+ # Testing
29
+ .pytest_cache/
30
+ .coverage
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+
35
+ # IDEs
36
+ .idea/
37
+ .vscode/
38
+ *.swp
39
+ *.swo
40
+
41
+ # OS
42
+ .DS_Store
43
+ Thumbs.db
@@ -0,0 +1,81 @@
1
+ Metadata-Version: 2.4
2
+ Name: sibylline-scurl
3
+ Version: 0.1.0
4
+ Summary: A secure curl wrapper with middleware support and HTML-to-markdown extraction
5
+ Author: Nathan
6
+ License: MIT
7
+ Keywords: curl,markdown,security,web-scraping
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Topic :: Security
18
+ Requires-Python: >=3.10
19
+ Requires-Dist: trafilatura>=1.6.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
22
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # scurl
26
+
27
+ [![PyPI version](https://badge.fury.io/py/scurl.svg)](https://badge.fury.io/py/scurl)
28
+ [![CI](https://github.com/yourusername/scurl/actions/workflows/ci.yml/badge.svg)](https://github.com/yourusername/scurl/actions/workflows/ci.yml)
29
+
30
+ A secure curl wrapper with middleware support and HTML-to-markdown extraction.
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ pip install scurl
36
+ ```
37
+
38
+ Or with [pipx](https://pipx.pypa.io/) (recommended for CLI tools):
39
+
40
+ ```bash
41
+ pipx install scurl
42
+ ```
43
+
44
+ ## Usage
45
+
46
+ ```bash
47
+ # Fetch a URL and extract clean markdown from HTML
48
+ scurl https://example.com
49
+
50
+ # Raw output (disable response middleware)
51
+ scurl --raw https://example.com
52
+
53
+ # All curl flags work
54
+ scurl -H "Accept: application/json" https://api.example.com/data
55
+ ```
56
+
57
+ ## Features
58
+
59
+ - **SecretDefender**: Automatically detects and blocks requests containing exposed secrets/tokens
60
+ - **TrafilaturaExtractor**: Extracts clean markdown from HTML responses
61
+ - **Middleware System**: Composable request and response middleware
62
+
63
+ ## Flags
64
+
65
+ | Flag | Description |
66
+ |------|-------------|
67
+ | `--raw` | Disable all response middleware |
68
+ | `--disable <slug>` | Disable a middleware by slug (can be repeated) |
69
+ | `--enable <slug>` | Override a middleware's block (can be repeated) |
70
+ | `--list-middleware` | List available middleware and their slugs |
71
+
72
+ ## Middleware Slugs
73
+
74
+ | Slug | Type | Description |
75
+ |------|------|-------------|
76
+ | `secret-defender` | Request | Detects and blocks requests containing secrets |
77
+ | `trafilatura` | Response | Extracts clean markdown from HTML |
78
+
79
+ ## License
80
+
81
+ MIT
@@ -0,0 +1,57 @@
1
+ # scurl
2
+
3
+ [![PyPI version](https://badge.fury.io/py/scurl.svg)](https://badge.fury.io/py/scurl)
4
+ [![CI](https://github.com/yourusername/scurl/actions/workflows/ci.yml/badge.svg)](https://github.com/yourusername/scurl/actions/workflows/ci.yml)
5
+
6
+ A secure curl wrapper with middleware support and HTML-to-markdown extraction.
7
+
8
+ ## Installation
9
+
10
+ ```bash
11
+ pip install scurl
12
+ ```
13
+
14
+ Or with [pipx](https://pipx.pypa.io/) (recommended for CLI tools):
15
+
16
+ ```bash
17
+ pipx install scurl
18
+ ```
19
+
20
+ ## Usage
21
+
22
+ ```bash
23
+ # Fetch a URL and extract clean markdown from HTML
24
+ scurl https://example.com
25
+
26
+ # Raw output (disable response middleware)
27
+ scurl --raw https://example.com
28
+
29
+ # All curl flags work
30
+ scurl -H "Accept: application/json" https://api.example.com/data
31
+ ```
32
+
33
+ ## Features
34
+
35
+ - **SecretDefender**: Automatically detects and blocks requests containing exposed secrets/tokens
36
+ - **TrafilaturaExtractor**: Extracts clean markdown from HTML responses
37
+ - **Middleware System**: Composable request and response middleware
38
+
39
+ ## Flags
40
+
41
+ | Flag | Description |
42
+ |------|-------------|
43
+ | `--raw` | Disable all response middleware |
44
+ | `--disable <slug>` | Disable a middleware by slug (can be repeated) |
45
+ | `--enable <slug>` | Override a middleware's block (can be repeated) |
46
+ | `--list-middleware` | List available middleware and their slugs |
47
+
48
+ ## Middleware Slugs
49
+
50
+ | Slug | Type | Description |
51
+ |------|------|-------------|
52
+ | `secret-defender` | Request | Detects and blocks requests containing secrets |
53
+ | `trafilatura` | Response | Extracts clean markdown from HTML |
54
+
55
+ ## License
56
+
57
+ MIT
@@ -0,0 +1,44 @@
1
+ [project]
2
+ name = "sibylline-scurl"
3
+ version = "0.1.0"
4
+ description = "A secure curl wrapper with middleware support and HTML-to-markdown extraction"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = { text = "MIT" }
8
+ authors = [{ name = "Nathan" }]
9
+ keywords = ["curl", "web-scraping", "security", "markdown"]
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "Environment :: Console",
13
+ "Intended Audience :: Developers",
14
+ "License :: OSI Approved :: MIT License",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.10",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Topic :: Internet :: WWW/HTTP",
20
+ "Topic :: Security",
21
+ ]
22
+ dependencies = [
23
+ "trafilatura>=1.6.0",
24
+ ]
25
+
26
+ [project.optional-dependencies]
27
+ dev = [
28
+ "pytest>=7.0.0",
29
+ "pytest-cov>=4.0.0",
30
+ ]
31
+
32
+ [project.scripts]
33
+ scurl = "scurl.cli:main"
34
+
35
+ [build-system]
36
+ requires = ["hatchling"]
37
+ build-backend = "hatchling.build"
38
+
39
+ [tool.hatch.build.targets.wheel]
40
+ packages = ["src/scurl"]
41
+
42
+ [tool.pytest.ini_options]
43
+ testpaths = ["tests"]
44
+ pythonpath = ["src"]
@@ -0,0 +1,3 @@
1
+ """scurl - A secure curl wrapper with middleware support."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,194 @@
1
+ """CLI entry point for scurl."""
2
+
3
+ import sys
4
+ from dataclasses import dataclass, field
5
+ from typing import Optional
6
+
7
+ from .middleware import (
8
+ RequestMiddlewareChain,
9
+ ResponseMiddlewareChain,
10
+ RequestAction,
11
+ )
12
+ from .request_middleware import SecretDefender
13
+ from .response_middleware import TrafilaturaExtractor
14
+ from .curl import parse_curl_args, execute_curl, curl_result_to_response_context
15
+
16
+
17
+ # Registry of available middleware with their slugs
18
+ REQUEST_MIDDLEWARE = {
19
+ "secret-defender": ("SecretDefender", "Detects and blocks requests containing secrets", SecretDefender),
20
+ }
21
+
22
+ RESPONSE_MIDDLEWARE = {
23
+ "trafilatura": ("TrafilaturaExtractor", "Extracts clean markdown from HTML", TrafilaturaExtractor),
24
+ }
25
+
26
+
27
+ def print_middleware_list() -> None:
28
+ """Print available middleware."""
29
+ print("Request Middleware:")
30
+ for slug, (name, desc, _) in REQUEST_MIDDLEWARE.items():
31
+ print(f" {slug:<20} {name} - {desc}")
32
+ print()
33
+ print("Response Middleware:")
34
+ for slug, (name, desc, _) in RESPONSE_MIDDLEWARE.items():
35
+ print(f" {slug:<20} {name} - {desc}")
36
+
37
+
38
+ @dataclass
39
+ class ScurlFlags:
40
+ """Parsed scurl-specific flags."""
41
+ raw: bool = False
42
+ disable: set[str] = field(default_factory=set)
43
+ enable: set[str] = field(default_factory=set)
44
+ list_middleware: bool = False
45
+ help: bool = False
46
+
47
+
48
+ def extract_scurl_flags(args: list[str]) -> tuple[ScurlFlags, list[str]]:
49
+ """Extract scurl-specific flags from args, return (flags, remaining_args)."""
50
+ flags = ScurlFlags()
51
+ remaining = []
52
+
53
+ i = 0
54
+ while i < len(args):
55
+ arg = args[i]
56
+ if arg == "--raw":
57
+ flags.raw = True
58
+ i += 1
59
+ elif arg == "--disable":
60
+ if i + 1 < len(args):
61
+ flags.disable.add(args[i + 1])
62
+ i += 2
63
+ else:
64
+ remaining.append(arg)
65
+ i += 1
66
+ elif arg == "--enable":
67
+ if i + 1 < len(args):
68
+ flags.enable.add(args[i + 1])
69
+ i += 2
70
+ else:
71
+ remaining.append(arg)
72
+ i += 1
73
+ elif arg == "--list-middleware":
74
+ flags.list_middleware = True
75
+ i += 1
76
+ elif arg in ("--help", "-h") and i == 0:
77
+ # Only treat as scurl help if it's the first arg
78
+ flags.help = True
79
+ i += 1
80
+ else:
81
+ remaining.append(arg)
82
+ i += 1
83
+
84
+ return flags, remaining
85
+
86
+
87
+ def print_help() -> None:
88
+ """Print scurl help."""
89
+ print("scurl - A secure curl wrapper with middleware support")
90
+ print()
91
+ print("Usage: scurl [scurl-options] [curl-options] <url>")
92
+ print()
93
+ print("scurl-specific options:")
94
+ print(" --raw Disable all response middleware (raw curl output)")
95
+ print(" --disable <middleware> Disable a middleware by slug (can be repeated)")
96
+ print(" --enable <middleware> Override a middleware's block (can be repeated)")
97
+ print(" --list-middleware List available middleware and their slugs")
98
+ print(" --help, -h Show this help (use curl --help for curl options)")
99
+ print()
100
+ print("All other options are passed directly to curl.")
101
+ print()
102
+ print("Examples:")
103
+ print(" scurl https://example.com # Fetch and extract markdown")
104
+ print(" scurl --raw https://example.com # Raw HTML output")
105
+ print(" scurl --disable trafilatura https://example.com # Disable markdown extraction")
106
+ print(" scurl --disable secret-defender https://... # Disable secret scanning")
107
+ print(" scurl --enable secret-defender https://... # Override a secret block")
108
+ print(" scurl -H 'Accept: application/json' https://api.example.com/data")
109
+
110
+
111
+ def run(args: Optional[list[str]] = None) -> int:
112
+ """Run scurl with the given arguments. Returns exit code."""
113
+ if args is None:
114
+ args = sys.argv[1:]
115
+
116
+ # Extract scurl-specific flags
117
+ flags, curl_args = extract_scurl_flags(args)
118
+
119
+ if flags.help:
120
+ print_help()
121
+ return 0
122
+
123
+ if flags.list_middleware:
124
+ print_middleware_list()
125
+ return 0
126
+
127
+ if not curl_args:
128
+ print("scurl: no URL specified", file=sys.stderr)
129
+ print("Try 'scurl --help' for more information.", file=sys.stderr)
130
+ return 1
131
+
132
+ # Parse curl args to get request context
133
+ context = parse_curl_args(curl_args)
134
+
135
+ if not context.url:
136
+ print("scurl: no URL specified", file=sys.stderr)
137
+ return 1
138
+
139
+ # Build request middleware chain
140
+ request_chain = RequestMiddlewareChain()
141
+ secret_defender_enabled = "secret-defender" not in flags.disable
142
+ secret_defender_override = "secret-defender" in flags.enable
143
+
144
+ if secret_defender_enabled and not secret_defender_override:
145
+ request_chain.add(SecretDefender())
146
+
147
+ # Execute request middleware
148
+ result = request_chain.execute(context)
149
+ if result.action == RequestAction.BLOCK:
150
+ print(f"scurl: {result.reason}", file=sys.stderr)
151
+ return 1
152
+
153
+ # Use potentially modified context
154
+ if result.context:
155
+ context = result.context
156
+
157
+ # Execute curl
158
+ curl_result = execute_curl(context)
159
+
160
+ if curl_result.return_code != 0 and curl_result.return_code != -1:
161
+ # curl failed but not our timeout/not-found
162
+ if curl_result.stderr:
163
+ print(curl_result.stderr, file=sys.stderr)
164
+ return curl_result.return_code
165
+
166
+ if curl_result.return_code == -1:
167
+ print(f"scurl: {curl_result.stderr}", file=sys.stderr)
168
+ return 1
169
+
170
+ # Build response middleware chain
171
+ response_chain = ResponseMiddlewareChain()
172
+ if not flags.raw:
173
+ if "trafilatura" not in flags.disable:
174
+ response_chain.add(TrafilaturaExtractor())
175
+
176
+ # Execute response middleware
177
+ response_context = curl_result_to_response_context(curl_result)
178
+ response_result = response_chain.execute(response_context)
179
+
180
+ # Output result
181
+ sys.stdout.buffer.write(response_result.body)
182
+ if response_result.body and not response_result.body.endswith(b"\n"):
183
+ sys.stdout.buffer.write(b"\n")
184
+
185
+ return 0
186
+
187
+
188
+ def main() -> None:
189
+ """Main entry point."""
190
+ sys.exit(run())
191
+
192
+
193
+ if __name__ == "__main__":
194
+ main()