klyrek-tech 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ """klyrek-tech: signature-based technology fingerprinting."""
2
+
3
+ from klyrek_tech.fingerprint import fingerprint
4
+ from klyrek_tech.signatures import SIGNATURES, Signature
5
+
6
+ __version__ = "0.1.0"
7
+
8
+ __all__ = ["SIGNATURES", "Signature", "__version__", "fingerprint"]
@@ -0,0 +1,93 @@
1
+ """Match an HTTP response against known technology signatures."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from http.cookies import SimpleCookie
7
+
8
+ import httpx
9
+
10
+ from klyrek_core.models import Technology
11
+ from klyrek_tech.signatures import SIGNATURES
12
+
13
+ _META_GENERATOR_RE = re.compile(
14
+ r'<meta[^>]+name=["\']generator["\'][^>]+content=["\']([^"\']+)["\']', re.IGNORECASE
15
+ )
16
+
17
+
18
+ def _extract_version(match: re.Match[str] | None) -> str | None:
19
+ if match is None:
20
+ return None
21
+ try:
22
+ return match.group("version")
23
+ except IndexError:
24
+ return None
25
+
26
+
27
+ def _cookie_names(response: httpx.Response) -> list[str]:
28
+ names: list[str] = []
29
+ for raw in response.headers.get_list("set-cookie"):
30
+ jar: SimpleCookie = SimpleCookie()
31
+ jar.load(raw)
32
+ names.extend(jar.keys())
33
+ return names
34
+
35
+
36
+ def _looks_like_text(response: httpx.Response) -> bool:
37
+ content_type = response.headers.get("content-type", "")
38
+ return "text" in content_type or "json" in content_type or content_type == ""
39
+
40
+
41
+ def fingerprint(response: httpx.Response) -> list[Technology]:
42
+ """Match a response against every known signature and return the technologies found."""
43
+ body = response.text if _looks_like_text(response) and response.content else ""
44
+ meta_generator_match = _META_GENERATOR_RE.search(body) if body else None
45
+ cookie_names = _cookie_names(response)
46
+
47
+ technologies: list[Technology] = []
48
+
49
+ for sig in SIGNATURES:
50
+ evidence: list[str] = []
51
+ version: str | None = None
52
+
53
+ for header_name, pattern in sig.headers.items():
54
+ value = response.headers.get(header_name)
55
+ if value is None:
56
+ continue
57
+ match = pattern.search(value)
58
+ if match:
59
+ evidence.append(f"{header_name}: {value}")
60
+ version = version or _extract_version(match)
61
+
62
+ for cookie_pattern in sig.cookies:
63
+ for name in cookie_names:
64
+ if cookie_pattern.search(name):
65
+ evidence.append(f"cookie: {name}")
66
+
67
+ if sig.meta_generator and meta_generator_match:
68
+ content = meta_generator_match.group(1)
69
+ match = sig.meta_generator.search(content)
70
+ if match:
71
+ evidence.append(f"meta generator: {content}")
72
+ version = version or _extract_version(match)
73
+
74
+ for body_pattern in sig.body:
75
+ if not body:
76
+ continue
77
+ match = body_pattern.search(body)
78
+ if match:
79
+ evidence.append(f"body pattern: {body_pattern.pattern}")
80
+ version = version or _extract_version(match)
81
+
82
+ if evidence:
83
+ technologies.append(
84
+ Technology(
85
+ name=sig.name,
86
+ category=sig.category,
87
+ version=version,
88
+ evidence=evidence,
89
+ discovered_by="klyrek-tech",
90
+ )
91
+ )
92
+
93
+ return technologies
klyrek_tech/py.typed ADDED
File without changes
@@ -0,0 +1,111 @@
1
+ """Declarative technology signatures.
2
+
3
+ Kept data-driven (a list of ``Signature`` records) rather than one class per
4
+ technology, since the matching logic (header/cookie/body regex) is identical
5
+ across every signature — only the patterns differ. Adding a new technology is a
6
+ new list entry, not new code.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+ from dataclasses import dataclass, field
13
+
14
+
15
+ @dataclass(slots=True)
16
+ class Signature:
17
+ name: str
18
+ category: str
19
+ headers: dict[str, re.Pattern[str]] = field(default_factory=dict)
20
+ cookies: list[re.Pattern[str]] = field(default_factory=list)
21
+ body: list[re.Pattern[str]] = field(default_factory=list)
22
+ meta_generator: re.Pattern[str] | None = None
23
+
24
+
25
+ def _re(pattern: str) -> re.Pattern[str]:
26
+ return re.compile(pattern, re.IGNORECASE)
27
+
28
+
29
+ #: A pattern with a named ``(?P<version>...)`` group lets fingerprint() report a version;
30
+ #: patterns without one still match, just without a version string attached.
31
+ SIGNATURES: list[Signature] = [
32
+ Signature(
33
+ "nginx", "web-server", headers={"server": _re(r"nginx(?:/(?P<version>[\d.]+))?")}
34
+ ),
35
+ Signature(
36
+ "Apache", "web-server", headers={"server": _re(r"apache(?:/(?P<version>[\d.]+))?")}
37
+ ),
38
+ Signature(
39
+ "Microsoft IIS",
40
+ "web-server",
41
+ headers={"server": _re(r"microsoft-iis(?:/(?P<version>[\d.]+))?")},
42
+ ),
43
+ Signature("PHP", "language", headers={"x-powered-by": _re(r"php(?:/(?P<version>[\d.]+))?")}),
44
+ Signature(
45
+ "ASP.NET",
46
+ "framework",
47
+ headers={
48
+ "x-powered-by": _re(r"asp\.net"),
49
+ "x-aspnet-version": _re(r"(?P<version>[\d.]+)"),
50
+ },
51
+ ),
52
+ Signature("Express", "framework", headers={"x-powered-by": _re(r"express")}),
53
+ Signature(
54
+ "Cloudflare", "cdn", headers={"server": _re(r"cloudflare"), "cf-ray": _re(r".+")}
55
+ ),
56
+ Signature(
57
+ "Vercel", "hosting", headers={"server": _re(r"vercel"), "x-vercel-id": _re(r".+")}
58
+ ),
59
+ Signature("Netlify", "hosting", headers={"server": _re(r"netlify")}),
60
+ Signature(
61
+ "WordPress",
62
+ "cms",
63
+ body=[_re(r"wp-content"), _re(r"wp-includes")],
64
+ meta_generator=_re(r"wordpress(?:\s+(?P<version>[\d.]+))?"),
65
+ ),
66
+ Signature(
67
+ "Drupal",
68
+ "cms",
69
+ body=[_re(r"drupal\.settings"), _re(r"/sites/default/files")],
70
+ meta_generator=_re(r"drupal(?:\s+(?P<version>[\d.]+))?"),
71
+ ),
72
+ Signature(
73
+ "Joomla",
74
+ "cms",
75
+ body=[_re(r"/media/jui/")],
76
+ meta_generator=_re(r"joomla!?\s*-?\s*(?P<version>[\d.]+)?"),
77
+ ),
78
+ Signature("Laravel", "framework", cookies=[_re(r"^laravel_session$")]),
79
+ Signature(
80
+ "Django",
81
+ "framework",
82
+ body=[_re(r"csrfmiddlewaretoken")],
83
+ cookies=[_re(r"^csrftoken$")],
84
+ ),
85
+ Signature(
86
+ "Ruby on Rails",
87
+ "framework",
88
+ cookies=[_re(r"^_\w+_session$")],
89
+ headers={"x-powered-by": _re(r"phusion passenger")},
90
+ ),
91
+ Signature(
92
+ "Next.js", "framework", body=[_re(r"__next_data__"), _re(r"/_next/static/")]
93
+ ),
94
+ Signature("React", "frontend-library", body=[_re(r"data-reactroot"), _re(r"react-dom")]),
95
+ Signature("Vue.js", "frontend-library", body=[_re(r"data-v-[0-9a-f]{6,}")]),
96
+ Signature(
97
+ "Angular",
98
+ "frontend-framework",
99
+ body=[_re(r'ng-version="(?P<version>[\d.]+)"')],
100
+ ),
101
+ Signature(
102
+ "jQuery",
103
+ "js-library",
104
+ body=[_re(r"jquery(?:[.-](?P<version>[\d.]+))?\.(?:min\.)?js")],
105
+ ),
106
+ Signature(
107
+ "Bootstrap",
108
+ "css-framework",
109
+ body=[_re(r"bootstrap(?:[.-](?P<version>[\d.]+))?\.(?:min\.)?css")],
110
+ ),
111
+ ]
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: klyrek-tech
3
+ Version: 0.1.0
4
+ Summary: Technology fingerprinting for the Klyrek ecosystem
5
+ Author: Klyrek Contributors
6
+ License: MIT
7
+ Keywords: appsec,fingerprinting,pentesting,reconnaissance,security
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Information Technology
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Security
13
+ Requires-Python: >=3.10
14
+ Requires-Dist: httpx>=0.27
15
+ Requires-Dist: klyrek-core
16
+ Provides-Extra: dev
17
+ Requires-Dist: mypy>=1.10; extra == 'dev'
18
+ Requires-Dist: pytest>=8.0; extra == 'dev'
19
+ Requires-Dist: ruff>=0.6; extra == 'dev'
20
+ Description-Content-Type: text/markdown
21
+
22
+ # klyrek-tech
23
+
24
+ Technology fingerprinting. Matches an `httpx.Response` against a curated set of declarative
25
+ `Signature`s (header patterns, cookie names, response-body patterns, and `<meta name="generator">`
26
+ tags) and produces `klyrek_core.models.Technology` records — the same approach tools like
27
+ Wappalyzer and WhatWeb use, kept intentionally data-driven rather than one class per technology
28
+ so new signatures are a one-line addition, not a new module.
29
+
30
+ ```python
31
+ from klyrek_tech.fingerprint import fingerprint
32
+
33
+ response = client.get("https://target.com/")
34
+ for tech in fingerprint(response):
35
+ print(tech.category, tech.name, tech.version, tech.evidence)
36
+ ```
37
+
38
+ Signatures live in `klyrek_tech.signatures.SIGNATURES` — add an entry there to recognize a new
39
+ server, framework, CMS, or JS library rather than writing new matching code.
@@ -0,0 +1,7 @@
1
+ klyrek_tech/__init__.py,sha256=RmS_dX5nuyMHL0QRstcXKwlStGMxDsDS6HQfXlc1Ijw,260
2
+ klyrek_tech/fingerprint.py,sha256=7YrJDPQF07q4V98oUbAwmAiLm_iRM2u4TdgmQjErLzk,3012
3
+ klyrek_tech/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ klyrek_tech/signatures.py,sha256=P4YiXeCVddoaU96dyYbOk6HY1hX1SvedzBDm5g8sf4M,3675
5
+ klyrek_tech-0.1.0.dist-info/METADATA,sha256=Ig3MzTkxvAugFKwAqNz8LuPjXJAze-AP2i_tTrsoU1c,1556
6
+ klyrek_tech-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
7
+ klyrek_tech-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any