klyrek-tech 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .eggs/
5
+ .qodo/
6
+ build/
7
+ dist/
8
+ .venv/
9
+ venv/
10
+ .env
11
+ .pytest_cache/
12
+ .mypy_cache/
13
+ .ruff_cache/
14
+ .coverage
15
+ htmlcov/
16
+ *.log
17
+ .idea/
18
+ .vscode/
19
+ !.vscode/extensions.json
20
+ klyrek_output/
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: klyrek-tech
3
+ Version: 0.1.0
4
+ Summary: Technology fingerprinting for the Klyrek ecosystem
5
+ Author: Klyrek Contributors
6
+ License: MIT
7
+ Keywords: appsec,fingerprinting,pentesting,reconnaissance,security
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Information Technology
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Security
13
+ Requires-Python: >=3.10
14
+ Requires-Dist: httpx>=0.27
15
+ Requires-Dist: klyrek-core
16
+ Provides-Extra: dev
17
+ Requires-Dist: mypy>=1.10; extra == 'dev'
18
+ Requires-Dist: pytest>=8.0; extra == 'dev'
19
+ Requires-Dist: ruff>=0.6; extra == 'dev'
20
+ Description-Content-Type: text/markdown
21
+
22
+ # klyrek-tech
23
+
24
+ Technology fingerprinting. Matches an `httpx.Response` against a curated set of declarative
25
+ `Signature`s (header patterns, cookie names, response-body patterns, and `<meta name="generator">`
26
+ tags) and produces `klyrek_core.models.Technology` records — the same approach tools like
27
+ Wappalyzer and WhatWeb use, kept intentionally data-driven rather than one class per technology
28
+ so new signatures are a one-line addition, not a new module.
29
+
30
+ ```python
31
+ from klyrek_tech.fingerprint import fingerprint
32
+
33
+ response = client.get("https://target.com/")
34
+ for tech in fingerprint(response):
35
+ print(tech.category, tech.name, tech.version, tech.evidence)
36
+ ```
37
+
38
+ Signatures live in `klyrek_tech.signatures.SIGNATURES` — add an entry there to recognize a new
39
+ server, framework, CMS, or JS library rather than writing new matching code.
@@ -0,0 +1,18 @@
1
+ # klyrek-tech
2
+
3
+ Technology fingerprinting. Matches an `httpx.Response` against a curated set of declarative
4
+ `Signature`s (header patterns, cookie names, response-body patterns, and `<meta name="generator">`
5
+ tags) and produces `klyrek_core.models.Technology` records — the same approach tools like
6
+ Wappalyzer and WhatWeb use, kept intentionally data-driven rather than one class per technology
7
+ so new signatures are a one-line addition, not a new module.
8
+
9
+ ```python
10
+ from klyrek_tech.fingerprint import fingerprint
11
+
12
+ response = client.get("https://target.com/")
13
+ for tech in fingerprint(response):
14
+ print(tech.category, tech.name, tech.version, tech.evidence)
15
+ ```
16
+
17
+ Signatures live in `klyrek_tech.signatures.SIGNATURES` — add an entry there to recognize a new
18
+ server, framework, CMS, or JS library rather than writing new matching code.
@@ -0,0 +1,30 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "klyrek-tech"
7
+ version = "0.1.0"
8
+ description = "Technology fingerprinting for the Klyrek ecosystem"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.10"
12
+ authors = [{ name = "Klyrek Contributors" }]
13
+ keywords = ["security", "reconnaissance", "appsec", "pentesting", "fingerprinting"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Information Technology",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Topic :: Security",
20
+ ]
21
+ dependencies = [
22
+ "klyrek-core",
23
+ "httpx>=0.27",
24
+ ]
25
+
26
+ [project.optional-dependencies]
27
+ dev = ["pytest>=8.0", "ruff>=0.6", "mypy>=1.10"]
28
+
29
+ [tool.hatch.build.targets.wheel]
30
+ packages = ["src/klyrek_tech"]
@@ -0,0 +1,8 @@
1
+ """klyrek-tech: signature-based technology fingerprinting."""
2
+
3
+ from klyrek_tech.fingerprint import fingerprint
4
+ from klyrek_tech.signatures import SIGNATURES, Signature
5
+
6
+ __version__ = "0.1.0"
7
+
8
+ __all__ = ["SIGNATURES", "Signature", "__version__", "fingerprint"]
@@ -0,0 +1,93 @@
1
+ """Match an HTTP response against known technology signatures."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from http.cookies import SimpleCookie
7
+
8
+ import httpx
9
+
10
+ from klyrek_core.models import Technology
11
+ from klyrek_tech.signatures import SIGNATURES
12
+
13
+ _META_GENERATOR_RE = re.compile(
14
+ r'<meta[^>]+name=["\']generator["\'][^>]+content=["\']([^"\']+)["\']', re.IGNORECASE
15
+ )
16
+
17
+
18
+ def _extract_version(match: re.Match[str] | None) -> str | None:
19
+ if match is None:
20
+ return None
21
+ try:
22
+ return match.group("version")
23
+ except IndexError:
24
+ return None
25
+
26
+
27
+ def _cookie_names(response: httpx.Response) -> list[str]:
28
+ names: list[str] = []
29
+ for raw in response.headers.get_list("set-cookie"):
30
+ jar: SimpleCookie = SimpleCookie()
31
+ jar.load(raw)
32
+ names.extend(jar.keys())
33
+ return names
34
+
35
+
36
+ def _looks_like_text(response: httpx.Response) -> bool:
37
+ content_type = response.headers.get("content-type", "")
38
+ return "text" in content_type or "json" in content_type or content_type == ""
39
+
40
+
41
+ def fingerprint(response: httpx.Response) -> list[Technology]:
42
+ """Match a response against every known signature and return the technologies found."""
43
+ body = response.text if _looks_like_text(response) and response.content else ""
44
+ meta_generator_match = _META_GENERATOR_RE.search(body) if body else None
45
+ cookie_names = _cookie_names(response)
46
+
47
+ technologies: list[Technology] = []
48
+
49
+ for sig in SIGNATURES:
50
+ evidence: list[str] = []
51
+ version: str | None = None
52
+
53
+ for header_name, pattern in sig.headers.items():
54
+ value = response.headers.get(header_name)
55
+ if value is None:
56
+ continue
57
+ match = pattern.search(value)
58
+ if match:
59
+ evidence.append(f"{header_name}: {value}")
60
+ version = version or _extract_version(match)
61
+
62
+ for cookie_pattern in sig.cookies:
63
+ for name in cookie_names:
64
+ if cookie_pattern.search(name):
65
+ evidence.append(f"cookie: {name}")
66
+
67
+ if sig.meta_generator and meta_generator_match:
68
+ content = meta_generator_match.group(1)
69
+ match = sig.meta_generator.search(content)
70
+ if match:
71
+ evidence.append(f"meta generator: {content}")
72
+ version = version or _extract_version(match)
73
+
74
+ for body_pattern in sig.body:
75
+ if not body:
76
+ continue
77
+ match = body_pattern.search(body)
78
+ if match:
79
+ evidence.append(f"body pattern: {body_pattern.pattern}")
80
+ version = version or _extract_version(match)
81
+
82
+ if evidence:
83
+ technologies.append(
84
+ Technology(
85
+ name=sig.name,
86
+ category=sig.category,
87
+ version=version,
88
+ evidence=evidence,
89
+ discovered_by="klyrek-tech",
90
+ )
91
+ )
92
+
93
+ return technologies
File without changes
@@ -0,0 +1,111 @@
1
+ """Declarative technology signatures.
2
+
3
+ Kept data-driven (a list of ``Signature`` records) rather than one class per
4
+ technology, since the matching logic (header/cookie/body regex) is identical
5
+ across every signature — only the patterns differ. Adding a new technology is a
6
+ new list entry, not new code.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+ from dataclasses import dataclass, field
13
+
14
+
15
+ @dataclass(slots=True)
16
+ class Signature:
17
+ name: str
18
+ category: str
19
+ headers: dict[str, re.Pattern[str]] = field(default_factory=dict)
20
+ cookies: list[re.Pattern[str]] = field(default_factory=list)
21
+ body: list[re.Pattern[str]] = field(default_factory=list)
22
+ meta_generator: re.Pattern[str] | None = None
23
+
24
+
25
+ def _re(pattern: str) -> re.Pattern[str]:
26
+ return re.compile(pattern, re.IGNORECASE)
27
+
28
+
29
+ #: A pattern with a named ``(?P<version>...)`` group lets fingerprint() report a version;
30
+ #: patterns without one still match, just without a version string attached.
31
+ SIGNATURES: list[Signature] = [
32
+ Signature(
33
+ "nginx", "web-server", headers={"server": _re(r"nginx(?:/(?P<version>[\d.]+))?")}
34
+ ),
35
+ Signature(
36
+ "Apache", "web-server", headers={"server": _re(r"apache(?:/(?P<version>[\d.]+))?")}
37
+ ),
38
+ Signature(
39
+ "Microsoft IIS",
40
+ "web-server",
41
+ headers={"server": _re(r"microsoft-iis(?:/(?P<version>[\d.]+))?")},
42
+ ),
43
+ Signature("PHP", "language", headers={"x-powered-by": _re(r"php(?:/(?P<version>[\d.]+))?")}),
44
+ Signature(
45
+ "ASP.NET",
46
+ "framework",
47
+ headers={
48
+ "x-powered-by": _re(r"asp\.net"),
49
+ "x-aspnet-version": _re(r"(?P<version>[\d.]+)"),
50
+ },
51
+ ),
52
+ Signature("Express", "framework", headers={"x-powered-by": _re(r"express")}),
53
+ Signature(
54
+ "Cloudflare", "cdn", headers={"server": _re(r"cloudflare"), "cf-ray": _re(r".+")}
55
+ ),
56
+ Signature(
57
+ "Vercel", "hosting", headers={"server": _re(r"vercel"), "x-vercel-id": _re(r".+")}
58
+ ),
59
+ Signature("Netlify", "hosting", headers={"server": _re(r"netlify")}),
60
+ Signature(
61
+ "WordPress",
62
+ "cms",
63
+ body=[_re(r"wp-content"), _re(r"wp-includes")],
64
+ meta_generator=_re(r"wordpress(?:\s+(?P<version>[\d.]+))?"),
65
+ ),
66
+ Signature(
67
+ "Drupal",
68
+ "cms",
69
+ body=[_re(r"drupal\.settings"), _re(r"/sites/default/files")],
70
+ meta_generator=_re(r"drupal(?:\s+(?P<version>[\d.]+))?"),
71
+ ),
72
+ Signature(
73
+ "Joomla",
74
+ "cms",
75
+ body=[_re(r"/media/jui/")],
76
+ meta_generator=_re(r"joomla!?\s*-?\s*(?P<version>[\d.]+)?"),
77
+ ),
78
+ Signature("Laravel", "framework", cookies=[_re(r"^laravel_session$")]),
79
+ Signature(
80
+ "Django",
81
+ "framework",
82
+ body=[_re(r"csrfmiddlewaretoken")],
83
+ cookies=[_re(r"^csrftoken$")],
84
+ ),
85
+ Signature(
86
+ "Ruby on Rails",
87
+ "framework",
88
+ cookies=[_re(r"^_\w+_session$")],
89
+ headers={"x-powered-by": _re(r"phusion passenger")},
90
+ ),
91
+ Signature(
92
+ "Next.js", "framework", body=[_re(r"__next_data__"), _re(r"/_next/static/")]
93
+ ),
94
+ Signature("React", "frontend-library", body=[_re(r"data-reactroot"), _re(r"react-dom")]),
95
+ Signature("Vue.js", "frontend-library", body=[_re(r"data-v-[0-9a-f]{6,}")]),
96
+ Signature(
97
+ "Angular",
98
+ "frontend-framework",
99
+ body=[_re(r'ng-version="(?P<version>[\d.]+)"')],
100
+ ),
101
+ Signature(
102
+ "jQuery",
103
+ "js-library",
104
+ body=[_re(r"jquery(?:[.-](?P<version>[\d.]+))?\.(?:min\.)?js")],
105
+ ),
106
+ Signature(
107
+ "Bootstrap",
108
+ "css-framework",
109
+ body=[_re(r"bootstrap(?:[.-](?P<version>[\d.]+))?\.(?:min\.)?css")],
110
+ ),
111
+ ]
@@ -0,0 +1,109 @@
1
+ import httpx
2
+
3
+ from klyrek_tech.fingerprint import fingerprint
4
+
5
+
6
+ def _response(
7
+ headers: dict[str, str] | list[tuple[str, str]] | None = None,
8
+ body: str = "",
9
+ content_type: str = "text/html",
10
+ ) -> httpx.Response:
11
+ hdrs: dict[str, str] | list[tuple[str, str]] = headers or {}
12
+ if isinstance(hdrs, dict):
13
+ hdrs = {**hdrs, "content-type": content_type}
14
+ else:
15
+ hdrs = [*hdrs, ("content-type", content_type)]
16
+ return httpx.Response(
17
+ 200, headers=hdrs, content=body.encode(), request=httpx.Request("GET", "https://target.com/")
18
+ )
19
+
20
+
21
+ def _names(techs: list) -> set[str]:
22
+ return {t.name for t in techs}
23
+
24
+
25
+ def test_nginx_server_header_with_version():
26
+ response = _response(headers={"Server": "nginx/1.18.0"})
27
+ techs = fingerprint(response)
28
+ nginx = next(t for t in techs if t.name == "nginx")
29
+ assert nginx.category == "web-server"
30
+ assert nginx.version == "1.18.0"
31
+
32
+
33
+ def test_php_via_x_powered_by():
34
+ response = _response(headers={"X-Powered-By": "PHP/8.1.2"})
35
+ techs = fingerprint(response)
36
+ php = next(t for t in techs if t.name == "PHP")
37
+ assert php.version == "8.1.2"
38
+
39
+
40
+ def test_wordpress_via_body_markers():
41
+ response = _response(body='<link rel="stylesheet" href="/wp-content/themes/x/style.css">')
42
+ assert "WordPress" in _names(fingerprint(response))
43
+
44
+
45
+ def test_wordpress_via_meta_generator_with_version():
46
+ response = _response(
47
+ body='<meta name="generator" content="WordPress 6.4.2">'
48
+ )
49
+ techs = fingerprint(response)
50
+ wp = next(t for t in techs if t.name == "WordPress")
51
+ assert wp.version == "6.4.2"
52
+
53
+
54
+ def test_django_via_csrf_marker_and_cookie():
55
+ response = _response(
56
+ headers=[("Set-Cookie", "csrftoken=abc123; Path=/")],
57
+ body='<input type="hidden" name="csrfmiddlewaretoken" value="xyz">',
58
+ )
59
+ assert "Django" in _names(fingerprint(response))
60
+
61
+
62
+ def test_laravel_via_session_cookie():
63
+ response = _response(headers=[("Set-Cookie", "laravel_session=abc123; Path=/; HttpOnly")])
64
+ assert "Laravel" in _names(fingerprint(response))
65
+
66
+
67
+ def test_cloudflare_via_headers():
68
+ response = _response(headers={"Server": "cloudflare", "CF-RAY": "abc123-DFW"})
69
+ assert "Cloudflare" in _names(fingerprint(response))
70
+
71
+
72
+ def test_react_via_body_marker():
73
+ response = _response(body='<div id="root" data-reactroot=""></div>')
74
+ assert "React" in _names(fingerprint(response))
75
+
76
+
77
+ def test_angular_version_extracted():
78
+ response = _response(body='<html ng-version="17.0.1"><body></body></html>')
79
+ techs = fingerprint(response)
80
+ angular = next(t for t in techs if t.name == "Angular")
81
+ assert angular.version == "17.0.1"
82
+
83
+
84
+ def test_no_signatures_match_generic_response():
85
+ response = _response(headers={}, body="<html><body>hello</body></html>")
86
+ assert fingerprint(response) == []
87
+
88
+
89
+ def test_binary_response_is_not_scanned_as_body():
90
+ response = _response(
91
+ headers={}, body="wp-content should not match", content_type="image/png"
92
+ )
93
+ assert fingerprint(response) == []
94
+
95
+
96
+ def test_evidence_is_populated():
97
+ response = _response(headers={"Server": "nginx/1.24.0"})
98
+ techs = fingerprint(response)
99
+ nginx = next(t for t in techs if t.name == "nginx")
100
+ assert any("nginx/1.24.0" in e for e in nginx.evidence)
101
+
102
+
103
+ def test_multiple_technologies_detected_independently():
104
+ response = _response(
105
+ headers={"Server": "nginx/1.18.0", "X-Powered-By": "PHP/8.1.2"},
106
+ body='<meta name="generator" content="WordPress 6.4">',
107
+ )
108
+ names = _names(fingerprint(response))
109
+ assert {"nginx", "PHP", "WordPress"}.issubset(names)