klyrek-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klyrek_core-0.1.0/.gitignore +20 -0
- klyrek_core-0.1.0/PKG-INFO +37 -0
- klyrek_core-0.1.0/README.md +18 -0
- klyrek_core-0.1.0/pyproject.toml +27 -0
- klyrek_core-0.1.0/src/klyrek_core/__init__.py +31 -0
- klyrek_core-0.1.0/src/klyrek_core/config.py +54 -0
- klyrek_core-0.1.0/src/klyrek_core/exceptions.py +26 -0
- klyrek_core-0.1.0/src/klyrek_core/logging.py +39 -0
- klyrek_core-0.1.0/src/klyrek_core/models.py +105 -0
- klyrek_core-0.1.0/src/klyrek_core/plugins.py +77 -0
- klyrek_core-0.1.0/src/klyrek_core/py.typed +0 -0
- klyrek_core-0.1.0/src/klyrek_core/scope.py +50 -0
- klyrek_core-0.1.0/tests/test_config.py +28 -0
- klyrek_core-0.1.0/tests/test_logging.py +17 -0
- klyrek_core-0.1.0/tests/test_models.py +37 -0
- klyrek_core-0.1.0/tests/test_plugins.py +55 -0
- klyrek_core-0.1.0/tests/test_scope.py +42 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.py[cod]
|
|
3
|
+
*.egg-info/
|
|
4
|
+
.eggs/
|
|
5
|
+
.qodo/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
.venv/
|
|
9
|
+
venv/
|
|
10
|
+
.env
|
|
11
|
+
.pytest_cache/
|
|
12
|
+
.mypy_cache/
|
|
13
|
+
.ruff_cache/
|
|
14
|
+
.coverage
|
|
15
|
+
htmlcov/
|
|
16
|
+
*.log
|
|
17
|
+
.idea/
|
|
18
|
+
.vscode/
|
|
19
|
+
!.vscode/extensions.json
|
|
20
|
+
klyrek_output/
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: klyrek-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared utilities, configuration, logging, plugins, and common models for the Klyrek ecosystem
|
|
5
|
+
Author: Klyrek Contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: appsec,pentesting,reconnaissance,security
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Information Technology
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Security
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
16
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# klyrek-core
|
|
21
|
+
|
|
22
|
+
Shared foundation for the Klyrek ecosystem: configuration, logging, the plugin registry, common
|
|
23
|
+
domain models, and the authorization-scope guard that every other Klyrek package builds on.
|
|
24
|
+
|
|
25
|
+
## Scope enforcement
|
|
26
|
+
|
|
27
|
+
Klyrek modules are expected to refuse to act against a target that hasn't been explicitly
|
|
28
|
+
declared in scope. `klyrek_core.scope` provides `AuthorizationScope`, which downstream packages
|
|
29
|
+
call before making any network request:
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from klyrek_core.scope import AuthorizationScope
|
|
33
|
+
|
|
34
|
+
scope = AuthorizationScope(authorized_hosts=["target.com", "*.target.com"])
|
|
35
|
+
scope.check("https://target.com/login") # OK
|
|
36
|
+
scope.check("https://evil.com") # raises ScopeViolationError
|
|
37
|
+
```
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# klyrek-core
|
|
2
|
+
|
|
3
|
+
Shared foundation for the Klyrek ecosystem: configuration, logging, the plugin registry, common
|
|
4
|
+
domain models, and the authorization-scope guard that every other Klyrek package builds on.
|
|
5
|
+
|
|
6
|
+
## Scope enforcement
|
|
7
|
+
|
|
8
|
+
Klyrek modules are expected to refuse to act against a target that hasn't been explicitly
|
|
9
|
+
declared in scope. `klyrek_core.scope` provides `AuthorizationScope`, which downstream packages
|
|
10
|
+
call before making any network request:
|
|
11
|
+
|
|
12
|
+
```python
|
|
13
|
+
from klyrek_core.scope import AuthorizationScope
|
|
14
|
+
|
|
15
|
+
scope = AuthorizationScope(authorized_hosts=["target.com", "*.target.com"])
|
|
16
|
+
scope.check("https://target.com/login") # OK
|
|
17
|
+
scope.check("https://evil.com") # raises ScopeViolationError
|
|
18
|
+
```
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "klyrek-core"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Shared utilities, configuration, logging, plugins, and common models for the Klyrek ecosystem"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{ name = "Klyrek Contributors" }]
|
|
13
|
+
keywords = ["security", "reconnaissance", "appsec", "pentesting"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Information Technology",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Security",
|
|
20
|
+
]
|
|
21
|
+
dependencies = []
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
dev = ["pytest>=8.0", "ruff>=0.6", "mypy>=1.10"]
|
|
25
|
+
|
|
26
|
+
[tool.hatch.build.targets.wheel]
|
|
27
|
+
packages = ["src/klyrek_core"]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""klyrek-core: shared config, logging, plugins, and models for the Klyrek ecosystem."""
|
|
2
|
+
|
|
3
|
+
from klyrek_core.config import KlyrekConfig
|
|
4
|
+
from klyrek_core.exceptions import ConfigError, KlyrekError, PluginError, ScopeViolationError
|
|
5
|
+
from klyrek_core.logging import get_logger
|
|
6
|
+
from klyrek_core.models import Endpoint, Finding, ScanResult, Severity, Target, Technology
|
|
7
|
+
from klyrek_core.plugins import Plugin, PluginRegistry, register_plugin, registry
|
|
8
|
+
from klyrek_core.scope import AuthorizationScope
|
|
9
|
+
|
|
10
|
+
__version__ = "0.1.0"
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"AuthorizationScope",
|
|
14
|
+
"ConfigError",
|
|
15
|
+
"Endpoint",
|
|
16
|
+
"Finding",
|
|
17
|
+
"KlyrekConfig",
|
|
18
|
+
"KlyrekError",
|
|
19
|
+
"Plugin",
|
|
20
|
+
"PluginError",
|
|
21
|
+
"PluginRegistry",
|
|
22
|
+
"ScanResult",
|
|
23
|
+
"ScopeViolationError",
|
|
24
|
+
"Severity",
|
|
25
|
+
"Target",
|
|
26
|
+
"Technology",
|
|
27
|
+
"__version__",
|
|
28
|
+
"get_logger",
|
|
29
|
+
"register_plugin",
|
|
30
|
+
"registry",
|
|
31
|
+
]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Runtime configuration shared across Klyrek packages."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass, fields
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, get_type_hints
|
|
9
|
+
|
|
10
|
+
from klyrek_core.exceptions import ConfigError
|
|
11
|
+
|
|
12
|
+
ENV_PREFIX = "KLYREK_"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(slots=True)
|
|
16
|
+
class KlyrekConfig:
|
|
17
|
+
"""Shared, non-secret defaults consumed by every Klyrek package.
|
|
18
|
+
|
|
19
|
+
Individual packages (klyrek-http, klyrek-crawler, ...) may layer their own
|
|
20
|
+
config on top of this, but request pacing, identification, and output
|
|
21
|
+
location live here so behavior is consistent across the whole ecosystem.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
output_dir: Path = Path("klyrek_output")
|
|
25
|
+
user_agent: str = "Klyrek/0.1 (+https://github.com/klyrek/klyrek)"
|
|
26
|
+
timeout_seconds: float = 15.0
|
|
27
|
+
rate_limit_per_host: float = 5.0
|
|
28
|
+
max_concurrency: int = 10
|
|
29
|
+
respect_robots_txt: bool = True
|
|
30
|
+
verify_tls: bool = True
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def from_env(cls, prefix: str = ENV_PREFIX) -> KlyrekConfig:
|
|
34
|
+
"""Build a config from ``KLYREK_*`` environment variables, falling back to defaults."""
|
|
35
|
+
hints = get_type_hints(cls)
|
|
36
|
+
overrides: dict[str, Any] = {}
|
|
37
|
+
for f in fields(cls):
|
|
38
|
+
env_name = f"{prefix}{f.name.upper()}"
|
|
39
|
+
raw = os.environ.get(env_name)
|
|
40
|
+
if raw is None:
|
|
41
|
+
continue
|
|
42
|
+
overrides[f.name] = _coerce(raw, hints[f.name], env_name)
|
|
43
|
+
return cls(**overrides)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _coerce(raw: str, type_: type, env_name: str) -> Any:
|
|
47
|
+
try:
|
|
48
|
+
if type_ is bool:
|
|
49
|
+
return raw.strip().lower() in {"1", "true", "yes", "on"}
|
|
50
|
+
if type_ is Path:
|
|
51
|
+
return Path(raw)
|
|
52
|
+
return type_(raw)
|
|
53
|
+
except ValueError as exc:
|
|
54
|
+
raise ConfigError(f"Invalid value for {env_name}: {raw!r}") from exc
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Exception hierarchy shared across all Klyrek packages."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class KlyrekError(Exception):
|
|
7
|
+
"""Base class for all Klyrek exceptions."""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ConfigError(KlyrekError):
|
|
11
|
+
"""Raised when configuration is missing or invalid."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ScopeViolationError(KlyrekError):
|
|
15
|
+
"""Raised when an action targets a host that is not declared in scope."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, host: str) -> None:
|
|
18
|
+
self.host = host
|
|
19
|
+
super().__init__(
|
|
20
|
+
f"'{host}' is not an authorized target. Add it to the scan's "
|
|
21
|
+
"AuthorizationScope before targeting it."
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class PluginError(KlyrekError):
|
|
26
|
+
"""Raised for plugin registration or lookup failures."""
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Logging setup shared across Klyrek packages.
|
|
2
|
+
|
|
3
|
+
Every package should call ``get_logger(__name__)`` rather than configuring its own
|
|
4
|
+
handlers, so a scan that pulls in crawler + api + tech + report modules produces one
|
|
5
|
+
consistent, readable log stream instead of each package fighting over root config.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
_CONFIGURED = False
|
|
13
|
+
_BASE_LOGGER_NAME = "klyrek"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def configure_logging(level: int | str = logging.INFO) -> None:
|
|
17
|
+
"""Configure the shared 'klyrek' logger tree. Safe to call multiple times."""
|
|
18
|
+
global _CONFIGURED
|
|
19
|
+
logger = logging.getLogger(_BASE_LOGGER_NAME)
|
|
20
|
+
logger.setLevel(level)
|
|
21
|
+
if not _CONFIGURED:
|
|
22
|
+
handler = logging.StreamHandler()
|
|
23
|
+
handler.setFormatter(
|
|
24
|
+
logging.Formatter(
|
|
25
|
+
fmt="%(asctime)s %(levelname)-8s %(name)s: %(message)s",
|
|
26
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
27
|
+
)
|
|
28
|
+
)
|
|
29
|
+
logger.addHandler(handler)
|
|
30
|
+
logger.propagate = False
|
|
31
|
+
_CONFIGURED = True
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_logger(name: str) -> logging.Logger:
|
|
35
|
+
"""Return a logger under the shared 'klyrek' namespace, e.g. 'klyrek.crawler.pages'."""
|
|
36
|
+
if not _CONFIGURED:
|
|
37
|
+
configure_logging()
|
|
38
|
+
qualified = name if name.startswith(_BASE_LOGGER_NAME) else f"{_BASE_LOGGER_NAME}.{name}"
|
|
39
|
+
return logging.getLogger(qualified)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Common domain models shared across Klyrek packages.
|
|
2
|
+
|
|
3
|
+
These are the shapes every module (crawler, api, tech, auth, js, assets, headers,
|
|
4
|
+
report, ...) reads and writes, so a scan built from multiple packages composes into
|
|
5
|
+
one coherent application map instead of a pile of unrelated outputs.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import uuid
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from enum import Enum
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _now() -> datetime:
|
|
17
|
+
return datetime.now(timezone.utc)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _new_id() -> str:
|
|
21
|
+
return uuid.uuid4().hex
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Severity(str, Enum):
|
|
25
|
+
INFO = "info"
|
|
26
|
+
LOW = "low"
|
|
27
|
+
MEDIUM = "medium"
|
|
28
|
+
HIGH = "high"
|
|
29
|
+
CRITICAL = "critical"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(slots=True)
|
|
33
|
+
class Target:
|
|
34
|
+
"""The authorized root of a scan."""
|
|
35
|
+
|
|
36
|
+
base_url: str
|
|
37
|
+
id: str = field(default_factory=_new_id)
|
|
38
|
+
hosts: list[str] = field(default_factory=list)
|
|
39
|
+
notes: str | None = None
|
|
40
|
+
created_at: datetime = field(default_factory=_now)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(slots=True)
|
|
44
|
+
class Technology:
|
|
45
|
+
"""A fingerprinted technology observed on the target (framework, server, CMS, ...)."""
|
|
46
|
+
|
|
47
|
+
name: str
|
|
48
|
+
category: str
|
|
49
|
+
version: str | None = None
|
|
50
|
+
confidence: float = 1.0
|
|
51
|
+
evidence: list[str] = field(default_factory=list)
|
|
52
|
+
discovered_by: str | None = None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(slots=True)
|
|
56
|
+
class Endpoint:
|
|
57
|
+
"""A discovered page, form action, or API route."""
|
|
58
|
+
|
|
59
|
+
url: str
|
|
60
|
+
method: str = "GET"
|
|
61
|
+
id: str = field(default_factory=_new_id)
|
|
62
|
+
source: str = "crawler"
|
|
63
|
+
status_code: int | None = None
|
|
64
|
+
content_type: str | None = None
|
|
65
|
+
params: list[str] = field(default_factory=list)
|
|
66
|
+
requires_auth: bool | None = None
|
|
67
|
+
discovered_by: str | None = None
|
|
68
|
+
discovered_at: datetime = field(default_factory=_now)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass(slots=True)
|
|
72
|
+
class Finding:
|
|
73
|
+
"""An observation surfaced for human review — not a confirmed vulnerability."""
|
|
74
|
+
|
|
75
|
+
title: str
|
|
76
|
+
severity: Severity
|
|
77
|
+
id: str = field(default_factory=_new_id)
|
|
78
|
+
description: str = ""
|
|
79
|
+
endpoint: Endpoint | None = None
|
|
80
|
+
evidence: list[str] = field(default_factory=list)
|
|
81
|
+
module: str | None = None
|
|
82
|
+
created_at: datetime = field(default_factory=_now)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass(slots=True)
|
|
86
|
+
class ScanResult:
|
|
87
|
+
"""The aggregate output of a scan: everything Klyrek learned about a target."""
|
|
88
|
+
|
|
89
|
+
target: Target
|
|
90
|
+
id: str = field(default_factory=_new_id)
|
|
91
|
+
endpoints: list[Endpoint] = field(default_factory=list)
|
|
92
|
+
technologies: list[Technology] = field(default_factory=list)
|
|
93
|
+
findings: list[Finding] = field(default_factory=list)
|
|
94
|
+
started_at: datetime = field(default_factory=_now)
|
|
95
|
+
finished_at: datetime | None = None
|
|
96
|
+
metadata: dict[str, str] = field(default_factory=dict)
|
|
97
|
+
|
|
98
|
+
def add_endpoint(self, endpoint: Endpoint) -> None:
|
|
99
|
+
self.endpoints.append(endpoint)
|
|
100
|
+
|
|
101
|
+
def add_technology(self, technology: Technology) -> None:
|
|
102
|
+
self.technologies.append(technology)
|
|
103
|
+
|
|
104
|
+
def add_finding(self, finding: Finding) -> None:
|
|
105
|
+
self.findings.append(finding)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Plugin architecture shared across Klyrek packages.
|
|
2
|
+
|
|
3
|
+
Downstream packages (klyrek-tech, klyrek-osint, ...) register capabilities as
|
|
4
|
+
plugins under a category (e.g. "tech-fingerprint", "osint-source") rather than
|
|
5
|
+
being hard-wired into the core, so third parties can extend Klyrek without
|
|
6
|
+
forking it.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from abc import ABC, abstractmethod
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
from typing import Any, ClassVar
|
|
14
|
+
|
|
15
|
+
from klyrek_core.exceptions import PluginError
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Plugin(ABC):
|
|
19
|
+
"""Base class for anything pluggable into a Klyrek scan."""
|
|
20
|
+
|
|
21
|
+
name: ClassVar[str]
|
|
22
|
+
category: ClassVar[str]
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def run(self, *args: Any, **kwargs: Any) -> Any:
|
|
26
|
+
"""Execute the plugin's capability."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PluginRegistry:
|
|
30
|
+
"""Holds registered plugins, keyed by category then name."""
|
|
31
|
+
|
|
32
|
+
def __init__(self) -> None:
|
|
33
|
+
self._plugins: dict[str, dict[str, type[Plugin]]] = defaultdict(dict)
|
|
34
|
+
|
|
35
|
+
def register(self, plugin_cls: type[Plugin]) -> type[Plugin]:
|
|
36
|
+
if not getattr(plugin_cls, "name", None) or not getattr(plugin_cls, "category", None):
|
|
37
|
+
raise PluginError(
|
|
38
|
+
f"{plugin_cls.__name__} must define both 'name' and 'category' class attributes"
|
|
39
|
+
)
|
|
40
|
+
bucket = self._plugins[plugin_cls.category]
|
|
41
|
+
if plugin_cls.name in bucket:
|
|
42
|
+
raise PluginError(
|
|
43
|
+
f"Plugin '{plugin_cls.name}' is already registered under category "
|
|
44
|
+
f"'{plugin_cls.category}'"
|
|
45
|
+
)
|
|
46
|
+
bucket[plugin_cls.name] = plugin_cls
|
|
47
|
+
return plugin_cls
|
|
48
|
+
|
|
49
|
+
def get(self, category: str, name: str) -> type[Plugin]:
|
|
50
|
+
try:
|
|
51
|
+
return self._plugins[category][name]
|
|
52
|
+
except KeyError as exc:
|
|
53
|
+
raise PluginError(f"No plugin '{name}' registered under category '{category}'") from exc
|
|
54
|
+
|
|
55
|
+
def list(self, category: str | None = None) -> list[type[Plugin]]:
|
|
56
|
+
if category is not None:
|
|
57
|
+
return list(self._plugins.get(category, {}).values())
|
|
58
|
+
return [plugin for bucket in self._plugins.values() for plugin in bucket.values()]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
#: Process-wide default registry. Packages may instantiate their own
|
|
62
|
+
#: ``PluginRegistry`` for isolated testing instead of using this one.
|
|
63
|
+
registry = PluginRegistry()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def register_plugin(plugin_cls: type[Plugin]) -> type[Plugin]:
|
|
67
|
+
"""Class decorator: register a plugin on the default registry.
|
|
68
|
+
|
|
69
|
+
Example:
|
|
70
|
+
@register_plugin
|
|
71
|
+
class WordPressFingerprint(Plugin):
|
|
72
|
+
name = "wordpress"
|
|
73
|
+
category = "tech-fingerprint"
|
|
74
|
+
|
|
75
|
+
def run(self, response): ...
|
|
76
|
+
"""
|
|
77
|
+
return registry.register(plugin_cls)
|
|
File without changes
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Authorization scope enforcement.
|
|
2
|
+
|
|
3
|
+
Every Klyrek package that makes a network request is expected to call
|
|
4
|
+
``AuthorizationScope.check()`` (or ``is_authorized()``) first. This is the mechanism
|
|
5
|
+
behind Klyrek's "authorized assessments only" principle: a scan can only ever touch
|
|
6
|
+
hosts the operator has explicitly declared, so a typo'd target, a redirect to a
|
|
7
|
+
third-party domain, or a misconfigured crawl can't silently reach out-of-scope
|
|
8
|
+
infrastructure.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from fnmatch import fnmatch
|
|
15
|
+
from urllib.parse import urlsplit
|
|
16
|
+
|
|
17
|
+
from klyrek_core.exceptions import ScopeViolationError
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _hostname(url_or_host: str) -> str:
|
|
21
|
+
"""Extract a bare hostname from a URL or a already-bare host string."""
|
|
22
|
+
if "//" in url_or_host:
|
|
23
|
+
host = urlsplit(url_or_host).hostname
|
|
24
|
+
else:
|
|
25
|
+
host = urlsplit(f"//{url_or_host}").hostname
|
|
26
|
+
if not host:
|
|
27
|
+
raise ValueError(f"Could not determine a hostname from '{url_or_host}'")
|
|
28
|
+
return host.lower()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(slots=True)
|
|
32
|
+
class AuthorizationScope:
|
|
33
|
+
"""The set of hosts a scan is authorized to touch.
|
|
34
|
+
|
|
35
|
+
Patterns support ``fnmatch``-style wildcards, e.g. ``*.target.com``.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
authorized_hosts: list[str] = field(default_factory=list)
|
|
39
|
+
|
|
40
|
+
def add(self, host_pattern: str) -> None:
|
|
41
|
+
self.authorized_hosts.append(host_pattern.lower())
|
|
42
|
+
|
|
43
|
+
def is_authorized(self, url_or_host: str) -> bool:
|
|
44
|
+
host = _hostname(url_or_host)
|
|
45
|
+
return any(fnmatch(host, pattern) for pattern in self.authorized_hosts)
|
|
46
|
+
|
|
47
|
+
def check(self, url_or_host: str) -> None:
|
|
48
|
+
"""Raise ScopeViolationError if the given URL/host is not in scope."""
|
|
49
|
+
if not self.is_authorized(url_or_host):
|
|
50
|
+
raise ScopeViolationError(_hostname(url_or_host))
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from klyrek_core.config import KlyrekConfig
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_defaults():
|
|
7
|
+
config = KlyrekConfig()
|
|
8
|
+
assert config.output_dir == Path("klyrek_output")
|
|
9
|
+
assert config.timeout_seconds == 15.0
|
|
10
|
+
assert config.verify_tls is True
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_from_env_overrides(monkeypatch):
|
|
14
|
+
monkeypatch.setenv("KLYREK_TIMEOUT_SECONDS", "30")
|
|
15
|
+
monkeypatch.setenv("KLYREK_VERIFY_TLS", "false")
|
|
16
|
+
monkeypatch.setenv("KLYREK_OUTPUT_DIR", "/tmp/scan")
|
|
17
|
+
|
|
18
|
+
config = KlyrekConfig.from_env()
|
|
19
|
+
|
|
20
|
+
assert config.timeout_seconds == 30.0
|
|
21
|
+
assert config.verify_tls is False
|
|
22
|
+
assert config.output_dir == Path("/tmp/scan")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_from_env_ignores_unset_vars(monkeypatch):
|
|
26
|
+
monkeypatch.delenv("KLYREK_RATE_LIMIT_PER_HOST", raising=False)
|
|
27
|
+
config = KlyrekConfig.from_env()
|
|
28
|
+
assert config.rate_limit_per_host == 5.0
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from klyrek_core.logging import get_logger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_get_logger_namespaced():
|
|
7
|
+
logger = get_logger("crawler.pages")
|
|
8
|
+
assert logger.name == "klyrek.crawler.pages"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_get_logger_idempotent_handlers():
|
|
12
|
+
get_logger("a")
|
|
13
|
+
base = logging.getLogger("klyrek")
|
|
14
|
+
count_after_first = len(base.handlers)
|
|
15
|
+
|
|
16
|
+
get_logger("b")
|
|
17
|
+
assert len(base.handlers) == count_after_first
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from klyrek_core.models import Endpoint, Finding, ScanResult, Severity, Target, Technology
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_scan_result_aggregates():
|
|
5
|
+
target = Target(base_url="https://target.com", hosts=["target.com"])
|
|
6
|
+
result = ScanResult(target=target)
|
|
7
|
+
|
|
8
|
+
endpoint = Endpoint(url="https://target.com/api/users", method="GET", source="api")
|
|
9
|
+
result.add_endpoint(endpoint)
|
|
10
|
+
|
|
11
|
+
tech = Technology(name="nginx", category="web-server", version="1.25")
|
|
12
|
+
result.add_technology(tech)
|
|
13
|
+
|
|
14
|
+
finding = Finding(
|
|
15
|
+
title="Missing security headers",
|
|
16
|
+
severity=Severity.LOW,
|
|
17
|
+
endpoint=endpoint,
|
|
18
|
+
module="klyrek-headers",
|
|
19
|
+
)
|
|
20
|
+
result.add_finding(finding)
|
|
21
|
+
|
|
22
|
+
assert result.target is target
|
|
23
|
+
assert result.endpoints == [endpoint]
|
|
24
|
+
assert result.technologies == [tech]
|
|
25
|
+
assert result.findings == [finding]
|
|
26
|
+
assert result.findings[0].endpoint.url == "https://target.com/api/users"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_ids_are_unique():
|
|
30
|
+
e1 = Endpoint(url="https://target.com/a")
|
|
31
|
+
e2 = Endpoint(url="https://target.com/b")
|
|
32
|
+
assert e1.id != e2.id
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_severity_is_str_enum():
|
|
36
|
+
assert Severity.CRITICAL == "critical"
|
|
37
|
+
assert Severity("high") is Severity.HIGH
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from klyrek_core.exceptions import PluginError
|
|
4
|
+
from klyrek_core.plugins import Plugin, PluginRegistry
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DummyFingerprint(Plugin):
|
|
8
|
+
name = "dummy"
|
|
9
|
+
category = "tech-fingerprint"
|
|
10
|
+
|
|
11
|
+
def run(self, response):
|
|
12
|
+
return "matched"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_register_and_get():
|
|
16
|
+
registry = PluginRegistry()
|
|
17
|
+
registry.register(DummyFingerprint)
|
|
18
|
+
|
|
19
|
+
plugin_cls = registry.get("tech-fingerprint", "dummy")
|
|
20
|
+
assert plugin_cls is DummyFingerprint
|
|
21
|
+
assert plugin_cls().run(response=None) == "matched"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_list_by_category():
|
|
25
|
+
registry = PluginRegistry()
|
|
26
|
+
registry.register(DummyFingerprint)
|
|
27
|
+
assert registry.list("tech-fingerprint") == [DummyFingerprint]
|
|
28
|
+
assert registry.list("other-category") == []
|
|
29
|
+
assert registry.list() == [DummyFingerprint]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_get_missing_plugin_raises():
|
|
33
|
+
registry = PluginRegistry()
|
|
34
|
+
with pytest.raises(PluginError):
|
|
35
|
+
registry.get("tech-fingerprint", "nonexistent")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_duplicate_registration_raises():
|
|
39
|
+
registry = PluginRegistry()
|
|
40
|
+
registry.register(DummyFingerprint)
|
|
41
|
+
with pytest.raises(PluginError):
|
|
42
|
+
registry.register(DummyFingerprint)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_missing_name_or_category_raises():
|
|
46
|
+
class Broken(Plugin):
|
|
47
|
+
name = "broken"
|
|
48
|
+
category = ""
|
|
49
|
+
|
|
50
|
+
def run(self):
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
registry = PluginRegistry()
|
|
54
|
+
with pytest.raises(PluginError):
|
|
55
|
+
registry.register(Broken)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from klyrek_core.exceptions import ScopeViolationError
|
|
4
|
+
from klyrek_core.scope import AuthorizationScope
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_exact_host_authorized():
|
|
8
|
+
scope = AuthorizationScope(authorized_hosts=["target.com"])
|
|
9
|
+
assert scope.is_authorized("https://target.com/login")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_unauthorized_host_rejected():
|
|
13
|
+
scope = AuthorizationScope(authorized_hosts=["target.com"])
|
|
14
|
+
assert not scope.is_authorized("https://evil.com")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_wildcard_subdomain():
|
|
18
|
+
scope = AuthorizationScope(authorized_hosts=["*.target.com"])
|
|
19
|
+
assert scope.is_authorized("https://api.target.com/v1")
|
|
20
|
+
assert not scope.is_authorized("https://target.com")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_check_raises_on_violation():
|
|
24
|
+
scope = AuthorizationScope(authorized_hosts=["target.com"])
|
|
25
|
+
with pytest.raises(ScopeViolationError):
|
|
26
|
+
scope.check("https://evil.com/steal")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_check_passes_silently_when_authorized():
|
|
30
|
+
scope = AuthorizationScope(authorized_hosts=["target.com"])
|
|
31
|
+
scope.check("https://target.com")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_bare_host_input():
|
|
35
|
+
scope = AuthorizationScope(authorized_hosts=["target.com"])
|
|
36
|
+
assert scope.is_authorized("target.com")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_add_pattern():
|
|
40
|
+
scope = AuthorizationScope()
|
|
41
|
+
scope.add("Target.com")
|
|
42
|
+
assert scope.is_authorized("https://target.com")
|