shadow-sniffer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ matrix:
13
+ python-version: ["3.10", "3.11", "3.12"]
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: ${{ matrix.python-version }}
19
+ - run: pip install -e ".[dev]"
20
+ - run: pytest
@@ -0,0 +1,9 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .venv/
4
+ venv/
5
+ *.egg-info/
6
+ build/
7
+ dist/
8
+ .pytest_cache/
9
+ .DS_Store
@@ -0,0 +1,26 @@
1
+ cff-version: 1.2.0
2
+ message: "If you use this software in your research, please cite it as below."
3
+ title: "shadow-sniffer: Shadow AI detector for network connection logs"
4
+ abstract: >
5
+ A CLI tool that scans a local network connection log (JSON or CSV) against
6
+ a curated catalog of known third-party AI service domains, cross-references
7
+ matches against an approved-services allowlist, and reports unsanctioned
8
+ AI usage (Shadow AI) tagged with MITRE ATT&CK T1567 (Exfiltration Over Web
9
+ Service).
10
+ authors:
11
+ - family-names: "Surendran"
12
+ given-names: "Prasanna Kumar"
13
+ repository-code: "https://github.com/Prasanna-27eng/shadow-sniffer"
14
+ url: "https://github.com/Prasanna-27eng/shadow-sniffer"
15
+ license: MIT
16
+ version: 0.1.0
17
+ date-released: "2026-06-11"
18
+ keywords:
19
+ - "shadow ai"
20
+ - "ai security"
21
+ - "data exfiltration"
22
+ - "network security"
23
+ - "saas security"
24
+ - "purple team"
25
+ - "blue team"
26
+ - "cspm"
@@ -0,0 +1,47 @@
1
+ # Contributing to shadow-sniffer
2
+
3
+ Contributions are welcome — especially new catalog entries and connection
4
+ log format support.
5
+
6
+ ## Adding an AI service to the catalog
7
+
8
+ `src/shadow_sniffer/catalog.py` holds `AI_SERVICE_CATALOG`, a list of
9
+ `AIService(name, domain, category)` entries. Matching is suffix-based, so
10
+ add the most specific hostname that's actually contacted (e.g.
11
+ `api.openai.com`, not `openai.com`, if the consumer chat app uses a
12
+ different domain).
13
+
14
+ When adding a service:
15
+
16
+ - Use one of the existing categories where it fits (`LLM Chat`, `LLM API`,
17
+ `Code Assistant`, `Image Generation`, `Video Generation`, `Voice & Audio`,
18
+ `Search & Research`, `Agent & Automation`), or propose a new one if none
19
+ fit.
20
+ - Add a test in `tests/test_catalog.py` confirming `match_domain()` resolves
21
+ the new hostname (and any subdomain you expect to see in real traffic).
22
+
23
+ ## Adding a connection log format
24
+
25
+ `src/shadow_sniffer/parsers.py` currently supports JSON and CSV via
26
+ `parse_log()`. To add a new format (e.g. Zeek `conn.log`, a specific proxy
27
+ export):
28
+
29
+ - Add a `parse_<format>_log(path) -> list[ConnectionRecord]` function that
30
+ maps the format's fields onto `ConnectionRecord` (use `_FIELD_ALIASES` /
31
+ `_to_record` if the format is dict-like).
32
+ - Wire it into `parse_log()`'s extension dispatch.
33
+ - Add a fixture under `tests/fixtures/` and cover it in `tests/test_parsers.py`.
34
+
35
+ ## Running tests
36
+
37
+ ```bash
38
+ pip install -e ".[dev]"
39
+ pytest
40
+ ```
41
+
42
+ ## Reporting a vulnerability in shadow-sniffer itself
43
+
44
+ shadow-sniffer only reads local connection-log files and never makes
45
+ network connections of its own (other than the optional AegisTrace
46
+ reporting POST). If you find a bug that could cause it to be misused beyond
47
+ this documented scope, please open an issue describing the problem.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Prasanna Kumar Surendran
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,142 @@
1
+ Metadata-Version: 2.4
2
+ Name: shadow-sniffer
3
+ Version: 0.1.0
4
+ Summary: Shadow AI detector — scans network connection logs for unsanctioned AI API/service usage and cross-references against an approved-services allowlist.
5
+ Project-URL: Homepage, https://github.com/Prasanna-27eng/shadow-sniffer
6
+ Project-URL: Repository, https://github.com/Prasanna-27eng/shadow-sniffer
7
+ Author: Prasanna Kumar Surendran
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: ai-security,blue-team,cspm,data-exfiltration,network-security,purple-team,saas-security,shadow-ai
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Information Technology
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Security
19
+ Requires-Python: >=3.10
20
+ Requires-Dist: httpx>=0.27
21
+ Requires-Dist: rich>=13.0
22
+ Requires-Dist: typer>=0.12
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=8.0; extra == 'dev'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # shadow-sniffer
28
+
29
+ [![PyPI](https://img.shields.io/pypi/v/shadow-sniffer.svg)](https://pypi.org/project/shadow-sniffer/)
30
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
31
+
32
+ > Shadow AI detector — scans network connection logs for unsanctioned AI
33
+ > service usage and cross-references against an approved-services allowlist.
34
+
35
+ **Educational / authorized-use tool.** shadow-sniffer reads a local export
36
+ of outbound network connections — no live agent, packet capture, or network
37
+ access required. Use it on your own logs, or on logs you have permission to
38
+ analyze.
39
+
40
+ ## Why shadow-sniffer?
41
+
42
+ Employees and AI agents routinely send data — source code, customer records,
43
+ credentials, internal documents — to third-party AI chat apps and APIs that
44
+ IT and security never approved. This is "Shadow AI": functionally the same
45
+ risk as Shadow IT, but the exfiltration channel is a chat box or an API call
46
+ that looks like normal HTTPS traffic.
47
+
48
+ shadow-sniffer is the fourth tool in the **Grassroots Expansion Pack**
49
+ (`mcp-sploit` → `prompt-fuzz` → `nhi-hunter` → `shadow-sniffer`). Where the
50
+ others attack an AI deployment's *tools*, *brain*, and *identity layer*,
51
+ shadow-sniffer looks at the *data layer* — where is data actually going.
52
+
53
+ It ships with a curated catalog of 39 known AI service domains across 8
54
+ categories (LLM Chat, LLM API, Code Assistant, Image/Video Generation, Voice
55
+ & Audio, and more), matches them against a connection log, and flags any
56
+ hit that isn't on your organization's approved list. Findings are tagged
57
+ **MITRE ATT&CK T1567 (Exfiltration Over Web Service)**.
58
+
59
+ ## Quick start
60
+
61
+ ```bash
62
+ pip install shadow-sniffer
63
+
64
+ shadow-sniffer scan --input connections.json --approved approved_services.json
65
+ ```
66
+
67
+ ## Try it without any setup
68
+
69
+ ```bash
70
+ git clone https://github.com/Prasanna-27eng/shadow-sniffer
71
+ cd shadow-sniffer
72
+ pip install -e .
73
+
74
+ shadow-sniffer scan \
75
+ --input examples/sample_connections.json \
76
+ --approved examples/approved_services.json
77
+ ```
78
+
79
+ ## Connection log format
80
+
81
+ A JSON file with a `connections` list (or a bare list), or a CSV with a
82
+ header row. Recognized fields (aliases in parentheses):
83
+
84
+ | Field | Aliases |
85
+ |---|---|
86
+ | `timestamp` | `time`, `ts` |
87
+ | `src_host` | `source_host`, `hostname` |
88
+ | `src_ip` | `source_ip` |
89
+ | `user` | `username` |
90
+ | `process_name` | `process`, `proc_name` |
91
+ | `dest_host` | `dest_domain`, `destination_domain`, `remote_hostname` |
92
+ | `dest_ip` | `destination_ip`, `remote_ip` |
93
+ | `dest_port` | `destination_port`, `remote_port` |
94
+ | `bytes_sent` | `bytes_out`, `tx_bytes` |
95
+
96
+ Only `dest_host`/its aliases are matched against the AI service catalog —
97
+ shadow-sniffer does not perform DNS or reverse-DNS lookups.
98
+
99
+ ## Approved-services allowlist
100
+
101
+ A JSON file with an `approved_domains` list, or a plain-text file with one
102
+ domain per line (`#` comments allowed). Domains are matched by suffix, so
103
+ `openai.com` in your allowlist covers both `api.openai.com` and
104
+ `chat.openai.com`.
105
+
106
+ ## Console commands
107
+
108
+ - `shadow-sniffer scan --input <log> [--approved <allowlist>] [--output results.json]` —
109
+ scan a connection log, print findings as a table, exit non-zero if any
110
+ unapproved AI service usage is found (usable as a CI gate).
111
+ - `shadow-sniffer list-services` — print the full built-in AI service catalog.
112
+
113
+ ## AegisTrace integration
114
+
115
+ ```bash
116
+ shadow-sniffer scan --input connections.json \
117
+ --aegistrace-url https://your-aegistrace-instance \
118
+ --aegistrace-key $AEGISTRACE_INGEST_KEY
119
+ ```
120
+
121
+ Each finding is POSTed to `/api/ingest/shadowsniffer-event`, creating an
122
+ `AgentAction` entry visible in AegisTrace's `/app/agent-security` dashboard.
123
+
124
+ ## Testing
125
+
126
+ ```bash
127
+ pip install -e ".[dev]"
128
+ pytest
129
+ ```
130
+
131
+ ## Companion projects
132
+
133
+ - [mcp-sploit](https://github.com/Prasanna-27eng/mcp-sploit) — dynamic
134
+ security testing for MCP servers (attacks the *tools* layer)
135
+ - [prompt-fuzz](https://github.com/Prasanna-27eng/prompt-fuzz) — jailbreak
136
+ and prompt-injection fuzzer for LLM endpoints (attacks the *brain* layer)
137
+ - [nhi-hunter](https://github.com/Prasanna-27eng/nhi-hunter) — AWS IAM
138
+ privilege-escalation graph builder (attacks the *identity* layer)
139
+
140
+ ## License
141
+
142
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,116 @@
1
+ # shadow-sniffer
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/shadow-sniffer.svg)](https://pypi.org/project/shadow-sniffer/)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
5
+
6
+ > Shadow AI detector — scans network connection logs for unsanctioned AI
7
+ > service usage and cross-references against an approved-services allowlist.
8
+
9
+ **Educational / authorized-use tool.** shadow-sniffer reads a local export
10
+ of outbound network connections — no live agent, packet capture, or network
11
+ access required. Use it on your own logs, or on logs you have permission to
12
+ analyze.
13
+
14
+ ## Why shadow-sniffer?
15
+
16
+ Employees and AI agents routinely send data — source code, customer records,
17
+ credentials, internal documents — to third-party AI chat apps and APIs that
18
+ IT and security never approved. This is "Shadow AI": functionally the same
19
+ risk as Shadow IT, but the exfiltration channel is a chat box or an API call
20
+ that looks like normal HTTPS traffic.
21
+
22
+ shadow-sniffer is the fourth tool in the **Grassroots Expansion Pack**
23
+ (`mcp-sploit` → `prompt-fuzz` → `nhi-hunter` → `shadow-sniffer`). Where the
24
+ others attack an AI deployment's *tools*, *brain*, and *identity layer*,
25
+ shadow-sniffer looks at the *data layer* — where is data actually going.
26
+
27
+ It ships with a curated catalog of 39 known AI service domains across 8
28
+ categories (LLM Chat, LLM API, Code Assistant, Image/Video Generation, Voice
29
+ & Audio, and more), matches them against a connection log, and flags any
30
+ hit that isn't on your organization's approved list. Findings are tagged
31
+ **MITRE ATT&CK T1567 (Exfiltration Over Web Service)**.
32
+
33
+ ## Quick start
34
+
35
+ ```bash
36
+ pip install shadow-sniffer
37
+
38
+ shadow-sniffer scan --input connections.json --approved approved_services.json
39
+ ```
40
+
41
+ ## Try it without any setup
42
+
43
+ ```bash
44
+ git clone https://github.com/Prasanna-27eng/shadow-sniffer
45
+ cd shadow-sniffer
46
+ pip install -e .
47
+
48
+ shadow-sniffer scan \
49
+ --input examples/sample_connections.json \
50
+ --approved examples/approved_services.json
51
+ ```
52
+
53
+ ## Connection log format
54
+
55
+ A JSON file with a `connections` list (or a bare list), or a CSV with a
56
+ header row. Recognized fields (aliases in parentheses):
57
+
58
+ | Field | Aliases |
59
+ |---|---|
60
+ | `timestamp` | `time`, `ts` |
61
+ | `src_host` | `source_host`, `hostname` |
62
+ | `src_ip` | `source_ip` |
63
+ | `user` | `username` |
64
+ | `process_name` | `process`, `proc_name` |
65
+ | `dest_host` | `dest_domain`, `destination_domain`, `remote_hostname` |
66
+ | `dest_ip` | `destination_ip`, `remote_ip` |
67
+ | `dest_port` | `destination_port`, `remote_port` |
68
+ | `bytes_sent` | `bytes_out`, `tx_bytes` |
69
+
70
+ Only `dest_host`/its aliases are matched against the AI service catalog —
71
+ shadow-sniffer does not perform DNS or reverse-DNS lookups.
72
+
73
+ ## Approved-services allowlist
74
+
75
+ A JSON file with an `approved_domains` list, or a plain-text file with one
76
+ domain per line (`#` comments allowed). Domains are matched by suffix, so
77
+ `openai.com` in your allowlist covers both `api.openai.com` and
78
+ `chat.openai.com`.
79
+
80
+ ## Console commands
81
+
82
+ - `shadow-sniffer scan --input <log> [--approved <allowlist>] [--output results.json]` —
83
+ scan a connection log, print findings as a table, exit non-zero if any
84
+ unapproved AI service usage is found (usable as a CI gate).
85
+ - `shadow-sniffer list-services` — print the full built-in AI service catalog.
86
+
87
+ ## AegisTrace integration
88
+
89
+ ```bash
90
+ shadow-sniffer scan --input connections.json \
91
+ --aegistrace-url https://your-aegistrace-instance \
92
+ --aegistrace-key $AEGISTRACE_INGEST_KEY
93
+ ```
94
+
95
+ Each finding is POSTed to `/api/ingest/shadowsniffer-event`, creating an
96
+ `AgentAction` entry visible in AegisTrace's `/app/agent-security` dashboard.
97
+
98
+ ## Testing
99
+
100
+ ```bash
101
+ pip install -e ".[dev]"
102
+ pytest
103
+ ```
104
+
105
+ ## Companion projects
106
+
107
+ - [mcp-sploit](https://github.com/Prasanna-27eng/mcp-sploit) — dynamic
108
+ security testing for MCP servers (attacks the *tools* layer)
109
+ - [prompt-fuzz](https://github.com/Prasanna-27eng/prompt-fuzz) — jailbreak
110
+ and prompt-injection fuzzer for LLM endpoints (attacks the *brain* layer)
111
+ - [nhi-hunter](https://github.com/Prasanna-27eng/nhi-hunter) — AWS IAM
112
+ privilege-escalation graph builder (attacks the *identity* layer)
113
+
114
+ ## License
115
+
116
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,5 @@
1
+ {
2
+ "approved_domains": [
3
+ "api.anthropic.com"
4
+ ]
5
+ }
@@ -0,0 +1,92 @@
1
+ {
2
+ "connections": [
3
+ {
4
+ "timestamp": "2026-06-10T09:01:00Z",
5
+ "src_host": "dev-laptop-01",
6
+ "src_ip": "10.0.1.15",
7
+ "user": "alice",
8
+ "process_name": "git",
9
+ "dest_host": "github.com",
10
+ "dest_ip": "140.82.112.3",
11
+ "dest_port": 443,
12
+ "bytes_sent": 4096
13
+ },
14
+ {
15
+ "timestamp": "2026-06-10T09:05:12Z",
16
+ "src_host": "dev-laptop-01",
17
+ "src_ip": "10.0.1.15",
18
+ "user": "alice",
19
+ "process_name": "python3",
20
+ "dest_host": "api.openai.com",
21
+ "dest_ip": "104.18.10.1",
22
+ "dest_port": 443,
23
+ "bytes_sent": 182340
24
+ },
25
+ {
26
+ "timestamp": "2026-06-10T09:12:45Z",
27
+ "src_host": "dev-laptop-02",
28
+ "src_ip": "10.0.1.22",
29
+ "user": "bob",
30
+ "process_name": "chrome",
31
+ "dest_host": "claude.ai",
32
+ "dest_ip": "104.18.20.5",
33
+ "dest_port": 443,
34
+ "bytes_sent": 51200
35
+ },
36
+ {
37
+ "timestamp": "2026-06-10T09:14:03Z",
38
+ "src_host": "dev-laptop-02",
39
+ "src_ip": "10.0.1.22",
40
+ "user": "bob",
41
+ "process_name": "chrome",
42
+ "dest_host": "chat.openai.com",
43
+ "dest_ip": "104.18.30.9",
44
+ "dest_port": 443,
45
+ "bytes_sent": 73400
46
+ },
47
+ {
48
+ "timestamp": "2026-06-10T09:20:30Z",
49
+ "src_host": "ci-runner-03",
50
+ "src_ip": "10.0.2.5",
51
+ "user": "svc-ci",
52
+ "process_name": "curl",
53
+ "dest_host": "api.anthropic.com",
54
+ "dest_ip": "104.18.40.2",
55
+ "dest_port": 443,
56
+ "bytes_sent": 9821
57
+ },
58
+ {
59
+ "timestamp": "2026-06-10T09:25:00Z",
60
+ "src_host": "dev-laptop-03",
61
+ "src_ip": "10.0.1.40",
62
+ "user": "carol",
63
+ "process_name": "code",
64
+ "dest_host": "api.githubcopilot.com",
65
+ "dest_ip": "140.82.113.21",
66
+ "dest_port": 443,
67
+ "bytes_sent": 12044
68
+ },
69
+ {
70
+ "timestamp": "2026-06-10T09:30:18Z",
71
+ "src_host": "dev-laptop-03",
72
+ "src_ip": "10.0.1.40",
73
+ "user": "carol",
74
+ "process_name": "node",
75
+ "dest_host": "internal-api.corp.local",
76
+ "dest_ip": "10.0.0.50",
77
+ "dest_port": 8080,
78
+ "bytes_sent": 2048
79
+ },
80
+ {
81
+ "timestamp": "2026-06-10T09:35:55Z",
82
+ "src_host": "dev-laptop-04",
83
+ "src_ip": "10.0.1.55",
84
+ "user": "dave",
85
+ "process_name": "chrome",
86
+ "dest_host": "midjourney.com",
87
+ "dest_ip": "104.18.50.7",
88
+ "dest_port": 443,
89
+ "bytes_sent": 304021
90
+ }
91
+ ]
92
+ }
@@ -0,0 +1,52 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "shadow-sniffer"
7
+ version = "0.1.0"
8
+ description = "Shadow AI detector — scans network connection logs for unsanctioned AI API/service usage and cross-references against an approved-services allowlist."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ authors = [{ name = "Prasanna Kumar Surendran" }]
13
+ keywords = [
14
+ "shadow-ai",
15
+ "ai-security",
16
+ "data-exfiltration",
17
+ "network-security",
18
+ "saas-security",
19
+ "purple-team",
20
+ "blue-team",
21
+ "cspm",
22
+ ]
23
+ classifiers = [
24
+ "Development Status :: 4 - Beta",
25
+ "Intended Audience :: Information Technology",
26
+ "Topic :: Security",
27
+ "License :: OSI Approved :: MIT License",
28
+ "Programming Language :: Python :: 3",
29
+ "Programming Language :: Python :: 3.10",
30
+ "Programming Language :: Python :: 3.11",
31
+ "Programming Language :: Python :: 3.12",
32
+ ]
33
+ dependencies = [
34
+ "typer>=0.12",
35
+ "rich>=13.0",
36
+ "httpx>=0.27",
37
+ ]
38
+
39
+ [project.optional-dependencies]
40
+ dev = [
41
+ "pytest>=8.0",
42
+ ]
43
+
44
+ [project.scripts]
45
+ shadow-sniffer = "shadow_sniffer.cli:app"
46
+
47
+ [project.urls]
48
+ Homepage = "https://github.com/Prasanna-27eng/shadow-sniffer"
49
+ Repository = "https://github.com/Prasanna-27eng/shadow-sniffer"
50
+
51
+ [tool.hatch.build.targets.wheel]
52
+ packages = ["src/shadow_sniffer"]
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,109 @@
1
+ """Built-in catalog of known third-party AI service domains.
2
+
3
+ Each entry maps a single hostname to the AI product it belongs to. Matching
4
+ is suffix-based (``traffic to "foo.api.openai.com" matches "api.openai.com"``)
5
+ so a single catalog row covers an entire subdomain tree.
6
+
7
+ This catalog is intentionally curated and small enough to read end-to-end —
8
+ it is a starting point for an organization's own allow/deny lists, not an
9
+ exhaustive registry. Contributions of new services are welcome (see
10
+ CONTRIBUTING.md).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class AIService:
20
+ """A single known AI-service hostname."""
21
+
22
+ name: str
23
+ domain: str
24
+ category: str
25
+
26
+
27
+ # Categories used across the catalog:
28
+ # "LLM Chat" - consumer-facing chat web apps
29
+ # "LLM API" - developer/programmatic model endpoints
30
+ # "Code Assistant" - AI pair-programming / IDE integrations
31
+ # "Image Generation" - text-to-image / image editing
32
+ # "Video Generation" - text-to-video / video editing
33
+ # "Voice & Audio" - text-to-speech, voice cloning, transcription
34
+ # "Search & Research" - AI-augmented search/answer engines
35
+ # "Agent & Automation" - hosted agent/workflow platforms
36
+
37
+ AI_SERVICE_CATALOG: list[AIService] = [
38
+ # --- LLM Chat (consumer web apps) ---
39
+ AIService("ChatGPT", "chat.openai.com", "LLM Chat"),
40
+ AIService("ChatGPT", "chatgpt.com", "LLM Chat"),
41
+ AIService("Claude", "claude.ai", "LLM Chat"),
42
+ AIService("Google Gemini", "gemini.google.com", "LLM Chat"),
43
+ AIService("Microsoft Copilot", "copilot.microsoft.com", "LLM Chat"),
44
+ AIService("Perplexity", "perplexity.ai", "LLM Chat"),
45
+ AIService("Poe", "poe.com", "LLM Chat"),
46
+ AIService("Character.AI", "character.ai", "LLM Chat"),
47
+ AIService("You.com", "you.com", "LLM Chat"),
48
+ AIService("DeepSeek Chat", "chat.deepseek.com", "LLM Chat"),
49
+ AIService("Grok", "grok.com", "LLM Chat"),
50
+
51
+ # --- LLM API (developer/programmatic endpoints) ---
52
+ AIService("OpenAI API", "api.openai.com", "LLM API"),
53
+ AIService("Anthropic API", "api.anthropic.com", "LLM API"),
54
+ AIService("Google Gemini API", "generativelanguage.googleapis.com", "LLM API"),
55
+ AIService("Mistral AI API", "api.mistral.ai", "LLM API"),
56
+ AIService("Groq API", "api.groq.com", "LLM API"),
57
+ AIService("Cohere API", "api.cohere.ai", "LLM API"),
58
+ AIService("Hugging Face", "huggingface.co", "LLM API"),
59
+ AIService("Hugging Face Inference API", "api-inference.huggingface.co", "LLM API"),
60
+ AIService("Together AI API", "api.together.xyz", "LLM API"),
61
+ AIService("Perplexity API", "api.perplexity.ai", "LLM API"),
62
+ AIService("Replicate API", "api.replicate.com", "LLM API"),
63
+ AIService("Cerebras Inference API", "inference.cerebras.ai", "LLM API"),
64
+ AIService("DeepSeek API", "api.deepseek.com", "LLM API"),
65
+ AIService("OpenRouter", "openrouter.ai", "LLM API"),
66
+ AIService("xAI API", "api.x.ai", "LLM API"),
67
+
68
+ # --- Code Assistant ---
69
+ AIService("GitHub Copilot", "api.githubcopilot.com", "Code Assistant"),
70
+ AIService("GitHub Copilot", "copilot-proxy.githubusercontent.com", "Code Assistant"),
71
+ AIService("Codeium", "codeium.com", "Code Assistant"),
72
+ AIService("Codeium API", "api.codeium.com", "Code Assistant"),
73
+ AIService("Cursor", "api2.cursor.sh", "Code Assistant"),
74
+ AIService("Tabnine", "api.tabnine.com", "Code Assistant"),
75
+
76
+ # --- Image Generation ---
77
+ AIService("Stability AI API", "api.stability.ai", "Image Generation"),
78
+ AIService("Midjourney", "midjourney.com", "Image Generation"),
79
+ AIService("Leonardo.AI", "leonardo.ai", "Image Generation"),
80
+
81
+ # --- Video Generation ---
82
+ AIService("Runway", "runwayml.com", "Video Generation"),
83
+
84
+ # --- Voice & Audio ---
85
+ AIService("ElevenLabs", "elevenlabs.io", "Voice & Audio"),
86
+ AIService("ElevenLabs API", "api.elevenlabs.io", "Voice & Audio"),
87
+ AIService("PlayHT", "play.ht", "Voice & Audio"),
88
+ ]
89
+
90
+
91
+ def match_domain(hostname: str) -> AIService | None:
92
+ """Return the catalog entry matching ``hostname``, or ``None``.
93
+
94
+ Matching is suffix-based and case-insensitive: ``"foo.api.openai.com"``
95
+ and ``"api.openai.com"`` both match the ``api.openai.com`` entry.
96
+ """
97
+ if not hostname:
98
+ return None
99
+ hostname = hostname.lower().rstrip(".")
100
+ for service in AI_SERVICE_CATALOG:
101
+ domain = service.domain.lower()
102
+ if hostname == domain or hostname.endswith("." + domain):
103
+ return service
104
+ return None
105
+
106
+
107
+ def categories() -> list[str]:
108
+ """Return the sorted set of categories present in the catalog."""
109
+ return sorted({service.category for service in AI_SERVICE_CATALOG})