sibylline-scurl 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibylline_scurl-0.1.0/.github/workflows/ci.yml +46 -0
- sibylline_scurl-0.1.0/.github/workflows/publish.yml +71 -0
- sibylline_scurl-0.1.0/.gitignore +43 -0
- sibylline_scurl-0.1.0/PKG-INFO +81 -0
- sibylline_scurl-0.1.0/README.md +57 -0
- sibylline_scurl-0.1.0/pyproject.toml +44 -0
- sibylline_scurl-0.1.0/src/scurl/__init__.py +3 -0
- sibylline_scurl-0.1.0/src/scurl/cli.py +194 -0
- sibylline_scurl-0.1.0/src/scurl/curl.py +188 -0
- sibylline_scurl-0.1.0/src/scurl/middleware.py +173 -0
- sibylline_scurl-0.1.0/src/scurl/request_middleware.py +261 -0
- sibylline_scurl-0.1.0/src/scurl/response_middleware.py +95 -0
- sibylline_scurl-0.1.0/tests/__init__.py +1 -0
- sibylline_scurl-0.1.0/tests/test_cli.py +231 -0
- sibylline_scurl-0.1.0/tests/test_curl.py +149 -0
- sibylline_scurl-0.1.0/tests/test_middleware.py +305 -0
- sibylline_scurl-0.1.0/tests/test_response_middleware.py +235 -0
- sibylline_scurl-0.1.0/tests/test_secret_defender.py +308 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: |
|
|
26
|
+
python -m pip install --upgrade pip
|
|
27
|
+
pip install -e ".[dev]" pytest-mock
|
|
28
|
+
|
|
29
|
+
- name: Run tests
|
|
30
|
+
run: pytest -v
|
|
31
|
+
|
|
32
|
+
lint:
|
|
33
|
+
runs-on: ubuntu-latest
|
|
34
|
+
steps:
|
|
35
|
+
- uses: actions/checkout@v4
|
|
36
|
+
|
|
37
|
+
- name: Set up Python
|
|
38
|
+
uses: actions/setup-python@v5
|
|
39
|
+
with:
|
|
40
|
+
python-version: "3.12"
|
|
41
|
+
|
|
42
|
+
- name: Install ruff
|
|
43
|
+
run: pip install ruff
|
|
44
|
+
|
|
45
|
+
- name: Run ruff
|
|
46
|
+
run: ruff check src/ tests/
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
build:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- uses: actions/checkout@v4
|
|
12
|
+
|
|
13
|
+
- name: Set up Python
|
|
14
|
+
uses: actions/setup-python@v5
|
|
15
|
+
with:
|
|
16
|
+
python-version: "3.12"
|
|
17
|
+
|
|
18
|
+
- name: Install build dependencies
|
|
19
|
+
run: |
|
|
20
|
+
python -m pip install --upgrade pip
|
|
21
|
+
pip install build
|
|
22
|
+
|
|
23
|
+
- name: Build package
|
|
24
|
+
run: python -m build
|
|
25
|
+
|
|
26
|
+
- name: Upload artifacts
|
|
27
|
+
uses: actions/upload-artifact@v4
|
|
28
|
+
with:
|
|
29
|
+
name: dist
|
|
30
|
+
path: dist/
|
|
31
|
+
|
|
32
|
+
test:
|
|
33
|
+
runs-on: ubuntu-latest
|
|
34
|
+
needs: build
|
|
35
|
+
steps:
|
|
36
|
+
- uses: actions/checkout@v4
|
|
37
|
+
|
|
38
|
+
- name: Set up Python
|
|
39
|
+
uses: actions/setup-python@v5
|
|
40
|
+
with:
|
|
41
|
+
python-version: "3.12"
|
|
42
|
+
|
|
43
|
+
- name: Download artifacts
|
|
44
|
+
uses: actions/download-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: dist
|
|
47
|
+
path: dist/
|
|
48
|
+
|
|
49
|
+
- name: Install from wheel
|
|
50
|
+
run: pip install dist/*.whl
|
|
51
|
+
|
|
52
|
+
- name: Test CLI
|
|
53
|
+
run: |
|
|
54
|
+
scurl --help
|
|
55
|
+
scurl --list-middleware
|
|
56
|
+
|
|
57
|
+
publish-pypi:
|
|
58
|
+
runs-on: ubuntu-latest
|
|
59
|
+
needs: [build, test]
|
|
60
|
+
|
|
61
|
+
steps:
|
|
62
|
+
- name: Download artifacts
|
|
63
|
+
uses: actions/download-artifact@v4
|
|
64
|
+
with:
|
|
65
|
+
name: dist
|
|
66
|
+
path: dist/
|
|
67
|
+
|
|
68
|
+
- name: Publish to PyPI
|
|
69
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
70
|
+
with:
|
|
71
|
+
password: ${{ secrets.PYPI_TOKEN }}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.egg
|
|
22
|
+
|
|
23
|
+
# Virtual environments
|
|
24
|
+
.venv/
|
|
25
|
+
venv/
|
|
26
|
+
ENV/
|
|
27
|
+
|
|
28
|
+
# Testing
|
|
29
|
+
.pytest_cache/
|
|
30
|
+
.coverage
|
|
31
|
+
htmlcov/
|
|
32
|
+
.tox/
|
|
33
|
+
.nox/
|
|
34
|
+
|
|
35
|
+
# IDEs
|
|
36
|
+
.idea/
|
|
37
|
+
.vscode/
|
|
38
|
+
*.swp
|
|
39
|
+
*.swo
|
|
40
|
+
|
|
41
|
+
# OS
|
|
42
|
+
.DS_Store
|
|
43
|
+
Thumbs.db
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sibylline-scurl
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A secure curl wrapper with middleware support and HTML-to-markdown extraction
|
|
5
|
+
Author: Nathan
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: curl,markdown,security,web-scraping
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Environment :: Console
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
+
Classifier: Topic :: Security
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Requires-Dist: trafilatura>=1.6.0
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
22
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# scurl
|
|
26
|
+
|
|
27
|
+
[](https://badge.fury.io/py/scurl)
|
|
28
|
+
[](https://github.com/yourusername/scurl/actions/workflows/ci.yml)
|
|
29
|
+
|
|
30
|
+
A secure curl wrapper with middleware support and HTML-to-markdown extraction.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install scurl
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Or with [pipx](https://pipx.pypa.io/) (recommended for CLI tools):
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pipx install scurl
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Usage
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# Fetch a URL and extract clean markdown from HTML
|
|
48
|
+
scurl https://example.com
|
|
49
|
+
|
|
50
|
+
# Raw output (disable response middleware)
|
|
51
|
+
scurl --raw https://example.com
|
|
52
|
+
|
|
53
|
+
# All curl flags work
|
|
54
|
+
scurl -H "Accept: application/json" https://api.example.com/data
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Features
|
|
58
|
+
|
|
59
|
+
- **SecretDefender**: Automatically detects and blocks requests containing exposed secrets/tokens
|
|
60
|
+
- **TrafilaturaExtractor**: Extracts clean markdown from HTML responses
|
|
61
|
+
- **Middleware System**: Composable request and response middleware
|
|
62
|
+
|
|
63
|
+
## Flags
|
|
64
|
+
|
|
65
|
+
| Flag | Description |
|
|
66
|
+
|------|-------------|
|
|
67
|
+
| `--raw` | Disable all response middleware |
|
|
68
|
+
| `--disable <slug>` | Disable a middleware by slug (can be repeated) |
|
|
69
|
+
| `--enable <slug>` | Override a middleware's block (can be repeated) |
|
|
70
|
+
| `--list-middleware` | List available middleware and their slugs |
|
|
71
|
+
|
|
72
|
+
## Middleware Slugs
|
|
73
|
+
|
|
74
|
+
| Slug | Type | Description |
|
|
75
|
+
|------|------|-------------|
|
|
76
|
+
| `secret-defender` | Request | Detects and blocks requests containing secrets |
|
|
77
|
+
| `trafilatura` | Response | Extracts clean markdown from HTML |
|
|
78
|
+
|
|
79
|
+
## License
|
|
80
|
+
|
|
81
|
+
MIT
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# scurl
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/py/scurl)
|
|
4
|
+
[](https://github.com/yourusername/scurl/actions/workflows/ci.yml)
|
|
5
|
+
|
|
6
|
+
A secure curl wrapper with middleware support and HTML-to-markdown extraction.
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
pip install scurl
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Or with [pipx](https://pipx.pypa.io/) (recommended for CLI tools):
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pipx install scurl
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Usage
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# Fetch a URL and extract clean markdown from HTML
|
|
24
|
+
scurl https://example.com
|
|
25
|
+
|
|
26
|
+
# Raw output (disable response middleware)
|
|
27
|
+
scurl --raw https://example.com
|
|
28
|
+
|
|
29
|
+
# All curl flags work
|
|
30
|
+
scurl -H "Accept: application/json" https://api.example.com/data
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Features
|
|
34
|
+
|
|
35
|
+
- **SecretDefender**: Automatically detects and blocks requests containing exposed secrets/tokens
|
|
36
|
+
- **TrafilaturaExtractor**: Extracts clean markdown from HTML responses
|
|
37
|
+
- **Middleware System**: Composable request and response middleware
|
|
38
|
+
|
|
39
|
+
## Flags
|
|
40
|
+
|
|
41
|
+
| Flag | Description |
|
|
42
|
+
|------|-------------|
|
|
43
|
+
| `--raw` | Disable all response middleware |
|
|
44
|
+
| `--disable <slug>` | Disable a middleware by slug (can be repeated) |
|
|
45
|
+
| `--enable <slug>` | Override a middleware's block (can be repeated) |
|
|
46
|
+
| `--list-middleware` | List available middleware and their slugs |
|
|
47
|
+
|
|
48
|
+
## Middleware Slugs
|
|
49
|
+
|
|
50
|
+
| Slug | Type | Description |
|
|
51
|
+
|------|------|-------------|
|
|
52
|
+
| `secret-defender` | Request | Detects and blocks requests containing secrets |
|
|
53
|
+
| `trafilatura` | Response | Extracts clean markdown from HTML |
|
|
54
|
+
|
|
55
|
+
## License
|
|
56
|
+
|
|
57
|
+
MIT
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "sibylline-scurl"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "A secure curl wrapper with middleware support and HTML-to-markdown extraction"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [{ name = "Nathan" }]
|
|
9
|
+
keywords = ["curl", "web-scraping", "security", "markdown"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 3 - Alpha",
|
|
12
|
+
"Environment :: Console",
|
|
13
|
+
"Intended Audience :: Developers",
|
|
14
|
+
"License :: OSI Approved :: MIT License",
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Programming Language :: Python :: 3.10",
|
|
17
|
+
"Programming Language :: Python :: 3.11",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Topic :: Internet :: WWW/HTTP",
|
|
20
|
+
"Topic :: Security",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"trafilatura>=1.6.0",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = [
|
|
28
|
+
"pytest>=7.0.0",
|
|
29
|
+
"pytest-cov>=4.0.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
scurl = "scurl.cli:main"
|
|
34
|
+
|
|
35
|
+
[build-system]
|
|
36
|
+
requires = ["hatchling"]
|
|
37
|
+
build-backend = "hatchling.build"
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["src/scurl"]
|
|
41
|
+
|
|
42
|
+
[tool.pytest.ini_options]
|
|
43
|
+
testpaths = ["tests"]
|
|
44
|
+
pythonpath = ["src"]
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""CLI entry point for scurl."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from .middleware import (
|
|
8
|
+
RequestMiddlewareChain,
|
|
9
|
+
ResponseMiddlewareChain,
|
|
10
|
+
RequestAction,
|
|
11
|
+
)
|
|
12
|
+
from .request_middleware import SecretDefender
|
|
13
|
+
from .response_middleware import TrafilaturaExtractor
|
|
14
|
+
from .curl import parse_curl_args, execute_curl, curl_result_to_response_context
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Registry of available middleware with their slugs
|
|
18
|
+
REQUEST_MIDDLEWARE = {
|
|
19
|
+
"secret-defender": ("SecretDefender", "Detects and blocks requests containing secrets", SecretDefender),
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
RESPONSE_MIDDLEWARE = {
|
|
23
|
+
"trafilatura": ("TrafilaturaExtractor", "Extracts clean markdown from HTML", TrafilaturaExtractor),
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def print_middleware_list() -> None:
|
|
28
|
+
"""Print available middleware."""
|
|
29
|
+
print("Request Middleware:")
|
|
30
|
+
for slug, (name, desc, _) in REQUEST_MIDDLEWARE.items():
|
|
31
|
+
print(f" {slug:<20} {name} - {desc}")
|
|
32
|
+
print()
|
|
33
|
+
print("Response Middleware:")
|
|
34
|
+
for slug, (name, desc, _) in RESPONSE_MIDDLEWARE.items():
|
|
35
|
+
print(f" {slug:<20} {name} - {desc}")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ScurlFlags:
|
|
40
|
+
"""Parsed scurl-specific flags."""
|
|
41
|
+
raw: bool = False
|
|
42
|
+
disable: set[str] = field(default_factory=set)
|
|
43
|
+
enable: set[str] = field(default_factory=set)
|
|
44
|
+
list_middleware: bool = False
|
|
45
|
+
help: bool = False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def extract_scurl_flags(args: list[str]) -> tuple[ScurlFlags, list[str]]:
|
|
49
|
+
"""Extract scurl-specific flags from args, return (flags, remaining_args)."""
|
|
50
|
+
flags = ScurlFlags()
|
|
51
|
+
remaining = []
|
|
52
|
+
|
|
53
|
+
i = 0
|
|
54
|
+
while i < len(args):
|
|
55
|
+
arg = args[i]
|
|
56
|
+
if arg == "--raw":
|
|
57
|
+
flags.raw = True
|
|
58
|
+
i += 1
|
|
59
|
+
elif arg == "--disable":
|
|
60
|
+
if i + 1 < len(args):
|
|
61
|
+
flags.disable.add(args[i + 1])
|
|
62
|
+
i += 2
|
|
63
|
+
else:
|
|
64
|
+
remaining.append(arg)
|
|
65
|
+
i += 1
|
|
66
|
+
elif arg == "--enable":
|
|
67
|
+
if i + 1 < len(args):
|
|
68
|
+
flags.enable.add(args[i + 1])
|
|
69
|
+
i += 2
|
|
70
|
+
else:
|
|
71
|
+
remaining.append(arg)
|
|
72
|
+
i += 1
|
|
73
|
+
elif arg == "--list-middleware":
|
|
74
|
+
flags.list_middleware = True
|
|
75
|
+
i += 1
|
|
76
|
+
elif arg in ("--help", "-h") and i == 0:
|
|
77
|
+
# Only treat as scurl help if it's the first arg
|
|
78
|
+
flags.help = True
|
|
79
|
+
i += 1
|
|
80
|
+
else:
|
|
81
|
+
remaining.append(arg)
|
|
82
|
+
i += 1
|
|
83
|
+
|
|
84
|
+
return flags, remaining
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def print_help() -> None:
|
|
88
|
+
"""Print scurl help."""
|
|
89
|
+
print("scurl - A secure curl wrapper with middleware support")
|
|
90
|
+
print()
|
|
91
|
+
print("Usage: scurl [scurl-options] [curl-options] <url>")
|
|
92
|
+
print()
|
|
93
|
+
print("scurl-specific options:")
|
|
94
|
+
print(" --raw Disable all response middleware (raw curl output)")
|
|
95
|
+
print(" --disable <middleware> Disable a middleware by slug (can be repeated)")
|
|
96
|
+
print(" --enable <middleware> Override a middleware's block (can be repeated)")
|
|
97
|
+
print(" --list-middleware List available middleware and their slugs")
|
|
98
|
+
print(" --help, -h Show this help (use curl --help for curl options)")
|
|
99
|
+
print()
|
|
100
|
+
print("All other options are passed directly to curl.")
|
|
101
|
+
print()
|
|
102
|
+
print("Examples:")
|
|
103
|
+
print(" scurl https://example.com # Fetch and extract markdown")
|
|
104
|
+
print(" scurl --raw https://example.com # Raw HTML output")
|
|
105
|
+
print(" scurl --disable trafilatura https://example.com # Disable markdown extraction")
|
|
106
|
+
print(" scurl --disable secret-defender https://... # Disable secret scanning")
|
|
107
|
+
print(" scurl --enable secret-defender https://... # Override a secret block")
|
|
108
|
+
print(" scurl -H 'Accept: application/json' https://api.example.com/data")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def run(args: Optional[list[str]] = None) -> int:
|
|
112
|
+
"""Run scurl with the given arguments. Returns exit code."""
|
|
113
|
+
if args is None:
|
|
114
|
+
args = sys.argv[1:]
|
|
115
|
+
|
|
116
|
+
# Extract scurl-specific flags
|
|
117
|
+
flags, curl_args = extract_scurl_flags(args)
|
|
118
|
+
|
|
119
|
+
if flags.help:
|
|
120
|
+
print_help()
|
|
121
|
+
return 0
|
|
122
|
+
|
|
123
|
+
if flags.list_middleware:
|
|
124
|
+
print_middleware_list()
|
|
125
|
+
return 0
|
|
126
|
+
|
|
127
|
+
if not curl_args:
|
|
128
|
+
print("scurl: no URL specified", file=sys.stderr)
|
|
129
|
+
print("Try 'scurl --help' for more information.", file=sys.stderr)
|
|
130
|
+
return 1
|
|
131
|
+
|
|
132
|
+
# Parse curl args to get request context
|
|
133
|
+
context = parse_curl_args(curl_args)
|
|
134
|
+
|
|
135
|
+
if not context.url:
|
|
136
|
+
print("scurl: no URL specified", file=sys.stderr)
|
|
137
|
+
return 1
|
|
138
|
+
|
|
139
|
+
# Build request middleware chain
|
|
140
|
+
request_chain = RequestMiddlewareChain()
|
|
141
|
+
secret_defender_enabled = "secret-defender" not in flags.disable
|
|
142
|
+
secret_defender_override = "secret-defender" in flags.enable
|
|
143
|
+
|
|
144
|
+
if secret_defender_enabled and not secret_defender_override:
|
|
145
|
+
request_chain.add(SecretDefender())
|
|
146
|
+
|
|
147
|
+
# Execute request middleware
|
|
148
|
+
result = request_chain.execute(context)
|
|
149
|
+
if result.action == RequestAction.BLOCK:
|
|
150
|
+
print(f"scurl: {result.reason}", file=sys.stderr)
|
|
151
|
+
return 1
|
|
152
|
+
|
|
153
|
+
# Use potentially modified context
|
|
154
|
+
if result.context:
|
|
155
|
+
context = result.context
|
|
156
|
+
|
|
157
|
+
# Execute curl
|
|
158
|
+
curl_result = execute_curl(context)
|
|
159
|
+
|
|
160
|
+
if curl_result.return_code != 0 and curl_result.return_code != -1:
|
|
161
|
+
# curl failed but not our timeout/not-found
|
|
162
|
+
if curl_result.stderr:
|
|
163
|
+
print(curl_result.stderr, file=sys.stderr)
|
|
164
|
+
return curl_result.return_code
|
|
165
|
+
|
|
166
|
+
if curl_result.return_code == -1:
|
|
167
|
+
print(f"scurl: {curl_result.stderr}", file=sys.stderr)
|
|
168
|
+
return 1
|
|
169
|
+
|
|
170
|
+
# Build response middleware chain
|
|
171
|
+
response_chain = ResponseMiddlewareChain()
|
|
172
|
+
if not flags.raw:
|
|
173
|
+
if "trafilatura" not in flags.disable:
|
|
174
|
+
response_chain.add(TrafilaturaExtractor())
|
|
175
|
+
|
|
176
|
+
# Execute response middleware
|
|
177
|
+
response_context = curl_result_to_response_context(curl_result)
|
|
178
|
+
response_result = response_chain.execute(response_context)
|
|
179
|
+
|
|
180
|
+
# Output result
|
|
181
|
+
sys.stdout.buffer.write(response_result.body)
|
|
182
|
+
if response_result.body and not response_result.body.endswith(b"\n"):
|
|
183
|
+
sys.stdout.buffer.write(b"\n")
|
|
184
|
+
|
|
185
|
+
return 0
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def main() -> None:
|
|
189
|
+
"""Main entry point."""
|
|
190
|
+
sys.exit(run())
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
if __name__ == "__main__":
|
|
194
|
+
main()
|