PyPI - shadow-sniffer - Versions diffs - 0.1.0__tar.gz - Mend

shadow-sniffer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

shadow_sniffer-0.1.0/.github/workflows/ci.yml +20 -0
shadow_sniffer-0.1.0/.gitignore +9 -0
shadow_sniffer-0.1.0/CITATION.cff +26 -0
shadow_sniffer-0.1.0/CONTRIBUTING.md +47 -0
shadow_sniffer-0.1.0/LICENSE +21 -0
shadow_sniffer-0.1.0/PKG-INFO +142 -0
shadow_sniffer-0.1.0/README.md +116 -0
shadow_sniffer-0.1.0/examples/approved_services.json +5 -0
shadow_sniffer-0.1.0/examples/sample_connections.json +92 -0
shadow_sniffer-0.1.0/pyproject.toml +52 -0
shadow_sniffer-0.1.0/src/shadow_sniffer/__init__.py +1 -0
shadow_sniffer-0.1.0/src/shadow_sniffer/catalog.py +109 -0
shadow_sniffer-0.1.0/src/shadow_sniffer/cli.py +132 -0
shadow_sniffer-0.1.0/src/shadow_sniffer/engine.py +159 -0
shadow_sniffer-0.1.0/src/shadow_sniffer/parsers.py +131 -0
shadow_sniffer-0.1.0/src/shadow_sniffer/reporter.py +50 -0
shadow_sniffer-0.1.0/tests/fixtures/approved_services.json +5 -0
shadow_sniffer-0.1.0/tests/fixtures/clean_connections.json +37 -0
shadow_sniffer-0.1.0/tests/fixtures/sample_connections.csv +4 -0
shadow_sniffer-0.1.0/tests/fixtures/sample_connections.json +92 -0
shadow_sniffer-0.1.0/tests/test_catalog.py +46 -0
shadow_sniffer-0.1.0/tests/test_cli.py +70 -0
shadow_sniffer-0.1.0/tests/test_engine.py +76 -0
shadow_sniffer-0.1.0/tests/test_parsers.py +57 -0

shadow_sniffer-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,20 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - run: pip install -e ".[dev]"
+      - run: pytest

shadow_sniffer-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,9 @@
+__pycache__/
+*.pyc
+.venv/
+venv/
+*.egg-info/
+build/
+dist/
+.pytest_cache/
+.DS_Store

shadow_sniffer-0.1.0/CITATION.cff ADDED Viewed

@@ -0,0 +1,26 @@
+cff-version: 1.2.0
+message: "If you use this software in your research, please cite it as below."
+title: "shadow-sniffer: Shadow AI detector for network connection logs"
+abstract: >
+  A CLI tool that scans a local network connection log (JSON or CSV) against
+  a curated catalog of known third-party AI service domains, cross-references
+  matches against an approved-services allowlist, and reports unsanctioned
+  AI usage (Shadow AI) tagged with MITRE ATT&CK T1567 (Exfiltration Over Web
+  Service).
+authors:
+  - family-names: "Surendran"
+    given-names: "Prasanna Kumar"
+repository-code: "https://github.com/Prasanna-27eng/shadow-sniffer"
+url: "https://github.com/Prasanna-27eng/shadow-sniffer"
+license: MIT
+version: 0.1.0
+date-released: "2026-06-11"
+keywords:
+  - "shadow ai"
+  - "ai security"
+  - "data exfiltration"
+  - "network security"
+  - "saas security"
+  - "purple team"
+  - "blue team"
+  - "cspm"

shadow_sniffer-0.1.0/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,47 @@
+# Contributing to shadow-sniffer
+Contributions are welcome — especially new catalog entries and connection
+log format support.
+## Adding an AI service to the catalog
+`src/shadow_sniffer/catalog.py` holds `AI_SERVICE_CATALOG`, a list of
+`AIService(name, domain, category)` entries. Matching is suffix-based, so
+add the most specific hostname that's actually contacted (e.g.
+`api.openai.com`, not `openai.com`, if the consumer chat app uses a
+different domain).
+When adding a service:
+- Use one of the existing categories where it fits (`LLM Chat`, `LLM API`,
+  `Code Assistant`, `Image Generation`, `Video Generation`, `Voice & Audio`,
+  `Search & Research`, `Agent & Automation`), or propose a new one if none
+  fit.
+- Add a test in `tests/test_catalog.py` confirming `match_domain()` resolves
+  the new hostname (and any subdomain you expect to see in real traffic).
+## Adding a connection log format
+`src/shadow_sniffer/parsers.py` currently supports JSON and CSV via
+`parse_log()`. To add a new format (e.g. Zeek `conn.log`, a specific proxy
+export):
+- Add a `parse_<format>_log(path) -> list[ConnectionRecord]` function that
+  maps the format's fields onto `ConnectionRecord` (use `_FIELD_ALIASES` /
+  `_to_record` if the format is dict-like).
+- Wire it into `parse_log()`'s extension dispatch.
+- Add a fixture under `tests/fixtures/` and cover it in `tests/test_parsers.py`.
+## Running tests
+```bash
+pip install -e ".[dev]"
+pytest
+```
+## Reporting a vulnerability in shadow-sniffer itself
+shadow-sniffer only reads local connection-log files and never makes
+network connections of its own (other than the optional AegisTrace
+reporting POST). If you find a bug that could cause it to be misused beyond
+this documented scope, please open an issue describing the problem.

shadow_sniffer-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Prasanna Kumar Surendran
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

shadow_sniffer-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,142 @@
+Metadata-Version: 2.4
+Name: shadow-sniffer
+Version: 0.1.0
+Summary: Shadow AI detector — scans network connection logs for unsanctioned AI API/service usage and cross-references against an approved-services allowlist.
+Project-URL: Homepage, https://github.com/Prasanna-27eng/shadow-sniffer
+Project-URL: Repository, https://github.com/Prasanna-27eng/shadow-sniffer
+Author: Prasanna Kumar Surendran
+License-Expression: MIT
+License-File: LICENSE
+Keywords: ai-security,blue-team,cspm,data-exfiltration,network-security,purple-team,saas-security,shadow-ai
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Information Technology
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Security
+Requires-Python: >=3.10
+Requires-Dist: httpx>=0.27
+Requires-Dist: rich>=13.0
+Requires-Dist: typer>=0.12
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# shadow-sniffer
+[![PyPI](https://img.shields.io/pypi/v/shadow-sniffer.svg)](https://pypi.org/project/shadow-sniffer/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
+> Shadow AI detector — scans network connection logs for unsanctioned AI
+> service usage and cross-references against an approved-services allowlist.
+**Educational / authorized-use tool.** shadow-sniffer reads a local export
+of outbound network connections — no live agent, packet capture, or network
+access required. Use it on your own logs, or on logs you have permission to
+analyze.
+## Why shadow-sniffer?
+Employees and AI agents routinely send data — source code, customer records,
+credentials, internal documents — to third-party AI chat apps and APIs that
+IT and security never approved. This is "Shadow AI": functionally the same
+risk as Shadow IT, but the exfiltration channel is a chat box or an API call
+that looks like normal HTTPS traffic.
+shadow-sniffer is the fourth tool in the **Grassroots Expansion Pack**
+(`mcp-sploit` → `prompt-fuzz` → `nhi-hunter` → `shadow-sniffer`). Where the
+others attack an AI deployment's *tools*, *brain*, and *identity layer*,
+shadow-sniffer looks at the *data layer* — where is data actually going.
+It ships with a curated catalog of 39 known AI service domains across 8
+categories (LLM Chat, LLM API, Code Assistant, Image/Video Generation, Voice
+& Audio, and more), matches them against a connection log, and flags any
+hit that isn't on your organization's approved list. Findings are tagged
+**MITRE ATT&CK T1567 (Exfiltration Over Web Service)**.
+## Quick start
+```bash
+pip install shadow-sniffer
+shadow-sniffer scan --input connections.json --approved approved_services.json
+```
+## Try it without any setup
+```bash
+git clone https://github.com/Prasanna-27eng/shadow-sniffer
+cd shadow-sniffer
+pip install -e .
+shadow-sniffer scan \
+  --input examples/sample_connections.json \
+  --approved examples/approved_services.json
+```
+## Connection log format
+A JSON file with a `connections` list (or a bare list), or a CSV with a
+header row. Recognized fields (aliases in parentheses):
+| Field | Aliases |
+|---|---|
+| `timestamp` | `time`, `ts` |
+| `src_host` | `source_host`, `hostname` |
+| `src_ip` | `source_ip` |
+| `user` | `username` |
+| `process_name` | `process`, `proc_name` |
+| `dest_host` | `dest_domain`, `destination_domain`, `remote_hostname` |
+| `dest_ip` | `destination_ip`, `remote_ip` |
+| `dest_port` | `destination_port`, `remote_port` |
+| `bytes_sent` | `bytes_out`, `tx_bytes` |
+Only `dest_host`/its aliases are matched against the AI service catalog —
+shadow-sniffer does not perform DNS or reverse-DNS lookups.
+## Approved-services allowlist
+A JSON file with an `approved_domains` list, or a plain-text file with one
+domain per line (`#` comments allowed). Domains are matched by suffix, so
+`openai.com` in your allowlist covers both `api.openai.com` and
+`chat.openai.com`.
+## Console commands
+- `shadow-sniffer scan --input <log> [--approved <allowlist>] [--output results.json]` —
+  scan a connection log, print findings as a table, exit non-zero if any
+  unapproved AI service usage is found (usable as a CI gate).
+- `shadow-sniffer list-services` — print the full built-in AI service catalog.
+## AegisTrace integration
+```bash
+shadow-sniffer scan --input connections.json \
+  --aegistrace-url https://your-aegistrace-instance \
+  --aegistrace-key $AEGISTRACE_INGEST_KEY
+```
+Each finding is POSTed to `/api/ingest/shadowsniffer-event`, creating an
+`AgentAction` entry visible in AegisTrace's `/app/agent-security` dashboard.
+## Testing
+```bash
+pip install -e ".[dev]"
+pytest
+```
+## Companion projects
+- [mcp-sploit](https://github.com/Prasanna-27eng/mcp-sploit) — dynamic
+  security testing for MCP servers (attacks the *tools* layer)
+- [prompt-fuzz](https://github.com/Prasanna-27eng/prompt-fuzz) — jailbreak
+  and prompt-injection fuzzer for LLM endpoints (attacks the *brain* layer)
+- [nhi-hunter](https://github.com/Prasanna-27eng/nhi-hunter) — AWS IAM
+  privilege-escalation graph builder (attacks the *identity* layer)
+## License
+MIT — see [LICENSE](LICENSE).

shadow_sniffer-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,116 @@
+# shadow-sniffer
+[![PyPI](https://img.shields.io/pypi/v/shadow-sniffer.svg)](https://pypi.org/project/shadow-sniffer/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
+> Shadow AI detector — scans network connection logs for unsanctioned AI
+> service usage and cross-references against an approved-services allowlist.
+**Educational / authorized-use tool.** shadow-sniffer reads a local export
+of outbound network connections — no live agent, packet capture, or network
+access required. Use it on your own logs, or on logs you have permission to
+analyze.
+## Why shadow-sniffer?
+Employees and AI agents routinely send data — source code, customer records,
+credentials, internal documents — to third-party AI chat apps and APIs that
+IT and security never approved. This is "Shadow AI": functionally the same
+risk as Shadow IT, but the exfiltration channel is a chat box or an API call
+that looks like normal HTTPS traffic.
+shadow-sniffer is the fourth tool in the **Grassroots Expansion Pack**
+(`mcp-sploit` → `prompt-fuzz` → `nhi-hunter` → `shadow-sniffer`). Where the
+others attack an AI deployment's *tools*, *brain*, and *identity layer*,
+shadow-sniffer looks at the *data layer* — where is data actually going.
+It ships with a curated catalog of 39 known AI service domains across 8
+categories (LLM Chat, LLM API, Code Assistant, Image/Video Generation, Voice
+& Audio, and more), matches them against a connection log, and flags any
+hit that isn't on your organization's approved list. Findings are tagged
+**MITRE ATT&CK T1567 (Exfiltration Over Web Service)**.
+## Quick start
+```bash
+pip install shadow-sniffer
+shadow-sniffer scan --input connections.json --approved approved_services.json
+```
+## Try it without any setup
+```bash
+git clone https://github.com/Prasanna-27eng/shadow-sniffer
+cd shadow-sniffer
+pip install -e .
+shadow-sniffer scan \
+  --input examples/sample_connections.json \
+  --approved examples/approved_services.json
+```
+## Connection log format
+A JSON file with a `connections` list (or a bare list), or a CSV with a
+header row. Recognized fields (aliases in parentheses):
+| Field | Aliases |
+|---|---|
+| `timestamp` | `time`, `ts` |
+| `src_host` | `source_host`, `hostname` |
+| `src_ip` | `source_ip` |
+| `user` | `username` |
+| `process_name` | `process`, `proc_name` |
+| `dest_host` | `dest_domain`, `destination_domain`, `remote_hostname` |
+| `dest_ip` | `destination_ip`, `remote_ip` |
+| `dest_port` | `destination_port`, `remote_port` |
+| `bytes_sent` | `bytes_out`, `tx_bytes` |
+Only `dest_host`/its aliases are matched against the AI service catalog —
+shadow-sniffer does not perform DNS or reverse-DNS lookups.
+## Approved-services allowlist
+A JSON file with an `approved_domains` list, or a plain-text file with one
+domain per line (`#` comments allowed). Domains are matched by suffix, so
+`openai.com` in your allowlist covers both `api.openai.com` and
+`chat.openai.com`.
+## Console commands
+- `shadow-sniffer scan --input <log> [--approved <allowlist>] [--output results.json]` —
+  scan a connection log, print findings as a table, exit non-zero if any
+  unapproved AI service usage is found (usable as a CI gate).
+- `shadow-sniffer list-services` — print the full built-in AI service catalog.
+## AegisTrace integration
+```bash
+shadow-sniffer scan --input connections.json \
+  --aegistrace-url https://your-aegistrace-instance \
+  --aegistrace-key $AEGISTRACE_INGEST_KEY
+```
+Each finding is POSTed to `/api/ingest/shadowsniffer-event`, creating an
+`AgentAction` entry visible in AegisTrace's `/app/agent-security` dashboard.
+## Testing
+```bash
+pip install -e ".[dev]"
+pytest
+```
+## Companion projects
+- [mcp-sploit](https://github.com/Prasanna-27eng/mcp-sploit) — dynamic
+  security testing for MCP servers (attacks the *tools* layer)
+- [prompt-fuzz](https://github.com/Prasanna-27eng/prompt-fuzz) — jailbreak
+  and prompt-injection fuzzer for LLM endpoints (attacks the *brain* layer)
+- [nhi-hunter](https://github.com/Prasanna-27eng/nhi-hunter) — AWS IAM
+  privilege-escalation graph builder (attacks the *identity* layer)
+## License
+MIT — see [LICENSE](LICENSE).

shadow_sniffer-0.1.0/examples/approved_services.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "approved_domains": [
+    "api.anthropic.com"
+  ]
+}

shadow_sniffer-0.1.0/examples/sample_connections.json ADDED Viewed

@@ -0,0 +1,92 @@
+{
+  "connections": [
+    {
+      "timestamp": "2026-06-10T09:01:00Z",
+      "src_host": "dev-laptop-01",
+      "src_ip": "10.0.1.15",
+      "user": "alice",
+      "process_name": "git",
+      "dest_host": "github.com",
+      "dest_ip": "140.82.112.3",
+      "dest_port": 443,
+      "bytes_sent": 4096
+    },
+    {
+      "timestamp": "2026-06-10T09:05:12Z",
+      "src_host": "dev-laptop-01",
+      "src_ip": "10.0.1.15",
+      "user": "alice",
+      "process_name": "python3",
+      "dest_host": "api.openai.com",
+      "dest_ip": "104.18.10.1",
+      "dest_port": 443,
+      "bytes_sent": 182340
+    },
+    {
+      "timestamp": "2026-06-10T09:12:45Z",
+      "src_host": "dev-laptop-02",
+      "src_ip": "10.0.1.22",
+      "user": "bob",
+      "process_name": "chrome",
+      "dest_host": "claude.ai",
+      "dest_ip": "104.18.20.5",
+      "dest_port": 443,
+      "bytes_sent": 51200
+    },
+    {
+      "timestamp": "2026-06-10T09:14:03Z",
+      "src_host": "dev-laptop-02",
+      "src_ip": "10.0.1.22",
+      "user": "bob",
+      "process_name": "chrome",
+      "dest_host": "chat.openai.com",
+      "dest_ip": "104.18.30.9",
+      "dest_port": 443,
+      "bytes_sent": 73400
+    },
+    {
+      "timestamp": "2026-06-10T09:20:30Z",
+      "src_host": "ci-runner-03",
+      "src_ip": "10.0.2.5",
+      "user": "svc-ci",
+      "process_name": "curl",
+      "dest_host": "api.anthropic.com",
+      "dest_ip": "104.18.40.2",
+      "dest_port": 443,
+      "bytes_sent": 9821
+    },
+    {
+      "timestamp": "2026-06-10T09:25:00Z",
+      "src_host": "dev-laptop-03",
+      "src_ip": "10.0.1.40",
+      "user": "carol",
+      "process_name": "code",
+      "dest_host": "api.githubcopilot.com",
+      "dest_ip": "140.82.113.21",
+      "dest_port": 443,
+      "bytes_sent": 12044
+    },
+    {
+      "timestamp": "2026-06-10T09:30:18Z",
+      "src_host": "dev-laptop-03",
+      "src_ip": "10.0.1.40",
+      "user": "carol",
+      "process_name": "node",
+      "dest_host": "internal-api.corp.local",
+      "dest_ip": "10.0.0.50",
+      "dest_port": 8080,
+      "bytes_sent": 2048
+    },
+    {
+      "timestamp": "2026-06-10T09:35:55Z",
+      "src_host": "dev-laptop-04",
+      "src_ip": "10.0.1.55",
+      "user": "dave",
+      "process_name": "chrome",
+      "dest_host": "midjourney.com",
+      "dest_ip": "104.18.50.7",
+      "dest_port": 443,
+      "bytes_sent": 304021
+    }
+  ]
+}

shadow_sniffer-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,52 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "shadow-sniffer"
+version = "0.1.0"
+description = "Shadow AI detector — scans network connection logs for unsanctioned AI API/service usage and cross-references against an approved-services allowlist."
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+authors = [{ name = "Prasanna Kumar Surendran" }]
+keywords = [
+    "shadow-ai",
+    "ai-security",
+    "data-exfiltration",
+    "network-security",
+    "saas-security",
+    "purple-team",
+    "blue-team",
+    "cspm",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Information Technology",
+    "Topic :: Security",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "typer>=0.12",
+    "rich>=13.0",
+    "httpx>=0.27",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0",
+]
+[project.scripts]
+shadow-sniffer = "shadow_sniffer.cli:app"
+[project.urls]
+Homepage = "https://github.com/Prasanna-27eng/shadow-sniffer"
+Repository = "https://github.com/Prasanna-27eng/shadow-sniffer"
+[tool.hatch.build.targets.wheel]
+packages = ["src/shadow_sniffer"]

shadow_sniffer-0.1.0/src/shadow_sniffer/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.1.0"

shadow_sniffer-0.1.0/src/shadow_sniffer/catalog.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""Built-in catalog of known third-party AI service domains.
+Each entry maps a single hostname to the AI product it belongs to. Matching
+is suffix-based (``traffic to "foo.api.openai.com" matches "api.openai.com"``)
+so a single catalog row covers an entire subdomain tree.
+This catalog is intentionally curated and small enough to read end-to-end —
+it is a starting point for an organization's own allow/deny lists, not an
+exhaustive registry. Contributions of new services are welcome (see
+CONTRIBUTING.md).
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass(frozen=True)
+class AIService:
+    """A single known AI-service hostname."""
+    name: str
+    domain: str
+    category: str
+# Categories used across the catalog:
+#   "LLM Chat"           - consumer-facing chat web apps
+#   "LLM API"            - developer/programmatic model endpoints
+#   "Code Assistant"      - AI pair-programming / IDE integrations
+#   "Image Generation"    - text-to-image / image editing
+#   "Video Generation"    - text-to-video / video editing
+#   "Voice & Audio"       - text-to-speech, voice cloning, transcription
+#   "Search & Research"   - AI-augmented search/answer engines
+#   "Agent & Automation"  - hosted agent/workflow platforms
+AI_SERVICE_CATALOG: list[AIService] = [
+    # --- LLM Chat (consumer web apps) ---
+    AIService("ChatGPT", "chat.openai.com", "LLM Chat"),
+    AIService("ChatGPT", "chatgpt.com", "LLM Chat"),
+    AIService("Claude", "claude.ai", "LLM Chat"),
+    AIService("Google Gemini", "gemini.google.com", "LLM Chat"),
+    AIService("Microsoft Copilot", "copilot.microsoft.com", "LLM Chat"),
+    AIService("Perplexity", "perplexity.ai", "LLM Chat"),
+    AIService("Poe", "poe.com", "LLM Chat"),
+    AIService("Character.AI", "character.ai", "LLM Chat"),
+    AIService("You.com", "you.com", "LLM Chat"),
+    AIService("DeepSeek Chat", "chat.deepseek.com", "LLM Chat"),
+    AIService("Grok", "grok.com", "LLM Chat"),
+    # --- LLM API (developer/programmatic endpoints) ---
+    AIService("OpenAI API", "api.openai.com", "LLM API"),
+    AIService("Anthropic API", "api.anthropic.com", "LLM API"),
+    AIService("Google Gemini API", "generativelanguage.googleapis.com", "LLM API"),
+    AIService("Mistral AI API", "api.mistral.ai", "LLM API"),
+    AIService("Groq API", "api.groq.com", "LLM API"),
+    AIService("Cohere API", "api.cohere.ai", "LLM API"),
+    AIService("Hugging Face", "huggingface.co", "LLM API"),
+    AIService("Hugging Face Inference API", "api-inference.huggingface.co", "LLM API"),
+    AIService("Together AI API", "api.together.xyz", "LLM API"),
+    AIService("Perplexity API", "api.perplexity.ai", "LLM API"),
+    AIService("Replicate API", "api.replicate.com", "LLM API"),
+    AIService("Cerebras Inference API", "inference.cerebras.ai", "LLM API"),
+    AIService("DeepSeek API", "api.deepseek.com", "LLM API"),
+    AIService("OpenRouter", "openrouter.ai", "LLM API"),
+    AIService("xAI API", "api.x.ai", "LLM API"),
+    # --- Code Assistant ---
+    AIService("GitHub Copilot", "api.githubcopilot.com", "Code Assistant"),
+    AIService("GitHub Copilot", "copilot-proxy.githubusercontent.com", "Code Assistant"),
+    AIService("Codeium", "codeium.com", "Code Assistant"),
+    AIService("Codeium API", "api.codeium.com", "Code Assistant"),
+    AIService("Cursor", "api2.cursor.sh", "Code Assistant"),
+    AIService("Tabnine", "api.tabnine.com", "Code Assistant"),
+    # --- Image Generation ---
+    AIService("Stability AI API", "api.stability.ai", "Image Generation"),
+    AIService("Midjourney", "midjourney.com", "Image Generation"),
+    AIService("Leonardo.AI", "leonardo.ai", "Image Generation"),
+    # --- Video Generation ---
+    AIService("Runway", "runwayml.com", "Video Generation"),
+    # --- Voice & Audio ---
+    AIService("ElevenLabs", "elevenlabs.io", "Voice & Audio"),
+    AIService("ElevenLabs API", "api.elevenlabs.io", "Voice & Audio"),
+    AIService("PlayHT", "play.ht", "Voice & Audio"),
+]
+def match_domain(hostname: str) -> AIService | None:
+    """Return the catalog entry matching ``hostname``, or ``None``.
+    Matching is suffix-based and case-insensitive: ``"foo.api.openai.com"``
+    and ``"api.openai.com"`` both match the ``api.openai.com`` entry.
+    """
+    if not hostname:
+        return None
+    hostname = hostname.lower().rstrip(".")
+    for service in AI_SERVICE_CATALOG:
+        domain = service.domain.lower()
+        if hostname == domain or hostname.endswith("." + domain):
+            return service
+    return None
+def categories() -> list[str]:
+    """Return the sorted set of categories present in the catalog."""
+    return sorted({service.category for service in AI_SERVICE_CATALOG})