PyPI - halyn - Versions diffs - 2.1.1__tar.gz → 2.1.2__tar.gz - Mend

halyn 2.1.1tar.gz → 2.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

{halyn-2.1.1/src/halyn.egg-info → halyn-2.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: halyn
-Version: 2.1.1
+Version: 2.1.2
 Summary: Halyn — The governance layer for AI agents. Every action intercepted. Every decision auditable.
 Author-email: Elmadani SALKA <contact@halyn.dev>
 License: BSL-1.1
@@ -157,18 +157,84 @@ Deployed via `/etc/opt/chrome/policies/managed/halyn.json` — the agent cannot
 ---
-## Supported Agents
-| Agent | Type | Integration |
-|-------|------|-------------|
-| Claude Sonnet 4.6 / Opus 4.6 (Anthropic) | Cloud | Proxy + MCP |
-| GPT-4.1 / o3 (OpenAI) | Cloud | Proxy |
-| Gemini 3.1 Pro / Flash (Google) | Cloud | Proxy |
-| Ollama | Local | Direct |
-| LM Studio | Local | Direct |
-| Jan.ai | Local | Direct |
-| OpenClaw | Agentic | Interceptor |
-| Any MCP agent | Any | MCP server |
+## Compatible AI
+Halyn intercepts at the kernel and proxy level. It does not care which AI is running — it audits all of them equally. No AI is excluded.
+### How compatibility works
+Halyn intercepts three things:
+- **API calls** (iptables REDIRECT on port 443/80) — catches any HTTP request to any AI provider
+- **Filesystem access** (inotify/FSEvents/eBPF) — catches any agent touching files, regardless of origin
+- **Process syscalls** (eBPF, Linux ≥5.8) — catches any agent at the kernel level
+This means: if an AI agent makes an API call or accesses your system, Halyn sees it.
+### Cloud AI
+| Provider | Models (March 2026) | API |
+|----------|---------------------|-----|
+| **Anthropic** | Claude Sonnet 4.6, Claude Opus 4.6, Claude Haiku 4.5 | api.anthropic.com |
+| **OpenAI** | GPT-4.1, GPT-4.1 mini, GPT-4.1 nano, o3, o4-mini | api.openai.com |
+| **Google** | Gemini 3.1 Pro, Gemini 3.1 Flash, Gemini 3.1 Flash-Lite | generativelanguage.googleapis.com |
+| **Mistral AI** | Mistral Large 2, Mistral Small 3, Codestral | api.mistral.ai |
+| **xAI** | Grok-3, Grok-3 mini | api.x.ai |
+| **DeepSeek** | DeepSeek-V3, DeepSeek-R1 | api.deepseek.com |
+| **Cohere** | Command R+, Command R, Aya | api.cohere.com |
+| **Perplexity** | Sonar Pro, Sonar, Sonar Reasoning | api.perplexity.ai |
+| **01.AI** | Yi-Large, Yi-Vision | api.01.ai |
+| **Alibaba** | Qwen-Max, Qwen-Plus, Qwen-Turbo | dashscope.aliyuncs.com |
+| **Baidu** | ERNIE 4.5, ERNIE Speed | aip.baidubce.com |
+| **Amazon Bedrock** | Claude, Titan, Llama, Mistral (via AWS) | bedrock.amazonaws.com |
+| **Azure OpenAI** | GPT-4.1, o3 (via Microsoft) | *.openai.azure.com |
+| **NVIDIA NIM** | Llama 3.3, Mistral, DeepSeek-R1 (on NVIDIA cloud) | integrate.api.nvidia.com |
+| **Together AI** | 50+ open models via API | api.together.xyz |
+| **Groq** | Llama, Mixtral, Gemma (ultra-fast inference) | api.groq.com |
+| **Fireworks AI** | Llama, Mixtral, DeepSeek | api.fireworks.ai |
+### Local AI
+Any local model is compatible — Halyn intercepts at the process level, not the network level.
+| Runtime | Models | Notes |
+|---------|--------|-------|
+| **Ollama** | Llama 3.3, Qwen2.5, Mistral, DeepSeek-R1, Phi-4, Gemma 3, ... | OpenAI-compatible API |
+| **LM Studio** | Any GGUF model | OpenAI-compatible server |
+| **Jan.ai** | Any GGUF or ONNX model | Desktop + server mode |
+| **GPT4All** | Llama, Mistral, Phi variants | Offline, no telemetry |
+| **llama.cpp** | Any GGUF model directly | Server mode (`--server`) |
+| **LocalAI** | 100+ models, any GGUF | Drop-in OpenAI replacement |
+| **text-generation-webui** | Any HuggingFace model | Extension ecosystem |
+| **KoboldCpp** | Any GGUF model | Focus on creative writing |
+| **OpenWebUI** | Ollama + OpenAI frontend | Browser-based |
+| **AnythingLLM** | Multi-model workspace | Team-friendly |
+| **Xinference** | HuggingFace + GGUF | Enterprise local inference |
+| **vLLM** | HuggingFace models | High-throughput server |
+| **TGI (HuggingFace)** | HuggingFace models | Production inference |
+### Agentic frameworks
+Halyn intercepts any agentic system. The agent framework doesn't matter.
+| Framework | Notes |
+|-----------|-------|
+| **OpenClaw** | Full interceptor — every action audited |
+| **Claude Cowork** | Proxy + filesystem hooks |
+| **Claude Code** | Process-level monitoring |
+| **LangChain** | API calls intercepted automatically |
+| **LlamaIndex** | API calls intercepted automatically |
+| **AutoGen** | API calls intercepted automatically |
+| **CrewAI** | API calls intercepted automatically |
+| **Semantic Kernel** | API calls intercepted automatically |
+| **BeeQ** | Native AAP integration |
+| **Any MCP agent** | MCP server passthrough |
+| **Any A2A agent** | Network-level interception |
+| **Any OpenAI-compatible API** | Universal proxy compatibility |
+### The rule
+> If an AI touches your machine or calls an API — Halyn sees it.
+> No exception. No exclusion. That's the point.
 ---

{halyn-2.1.1 → halyn-2.1.2}/README.md RENAMED Viewed

@@ -124,18 +124,84 @@ Deployed via `/etc/opt/chrome/policies/managed/halyn.json` — the agent cannot
 ---
-## Supported Agents
-| Agent | Type | Integration |
-|-------|------|-------------|
-| Claude Sonnet 4.6 / Opus 4.6 (Anthropic) | Cloud | Proxy + MCP |
-| GPT-4.1 / o3 (OpenAI) | Cloud | Proxy |
-| Gemini 3.1 Pro / Flash (Google) | Cloud | Proxy |
-| Ollama | Local | Direct |
-| LM Studio | Local | Direct |
-| Jan.ai | Local | Direct |
-| OpenClaw | Agentic | Interceptor |
-| Any MCP agent | Any | MCP server |
+## Compatible AI
+Halyn intercepts at the kernel and proxy level. It does not care which AI is running — it audits all of them equally. No AI is excluded.
+### How compatibility works
+Halyn intercepts three things:
+- **API calls** (iptables REDIRECT on port 443/80) — catches any HTTP request to any AI provider
+- **Filesystem access** (inotify/FSEvents/eBPF) — catches any agent touching files, regardless of origin
+- **Process syscalls** (eBPF, Linux ≥5.8) — catches any agent at the kernel level
+This means: if an AI agent makes an API call or accesses your system, Halyn sees it.
+### Cloud AI
+| Provider | Models (March 2026) | API |
+|----------|---------------------|-----|
+| **Anthropic** | Claude Sonnet 4.6, Claude Opus 4.6, Claude Haiku 4.5 | api.anthropic.com |
+| **OpenAI** | GPT-4.1, GPT-4.1 mini, GPT-4.1 nano, o3, o4-mini | api.openai.com |
+| **Google** | Gemini 3.1 Pro, Gemini 3.1 Flash, Gemini 3.1 Flash-Lite | generativelanguage.googleapis.com |
+| **Mistral AI** | Mistral Large 2, Mistral Small 3, Codestral | api.mistral.ai |
+| **xAI** | Grok-3, Grok-3 mini | api.x.ai |
+| **DeepSeek** | DeepSeek-V3, DeepSeek-R1 | api.deepseek.com |
+| **Cohere** | Command R+, Command R, Aya | api.cohere.com |
+| **Perplexity** | Sonar Pro, Sonar, Sonar Reasoning | api.perplexity.ai |
+| **01.AI** | Yi-Large, Yi-Vision | api.01.ai |
+| **Alibaba** | Qwen-Max, Qwen-Plus, Qwen-Turbo | dashscope.aliyuncs.com |
+| **Baidu** | ERNIE 4.5, ERNIE Speed | aip.baidubce.com |
+| **Amazon Bedrock** | Claude, Titan, Llama, Mistral (via AWS) | bedrock.amazonaws.com |
+| **Azure OpenAI** | GPT-4.1, o3 (via Microsoft) | *.openai.azure.com |
+| **NVIDIA NIM** | Llama 3.3, Mistral, DeepSeek-R1 (on NVIDIA cloud) | integrate.api.nvidia.com |
+| **Together AI** | 50+ open models via API | api.together.xyz |
+| **Groq** | Llama, Mixtral, Gemma (ultra-fast inference) | api.groq.com |
+| **Fireworks AI** | Llama, Mixtral, DeepSeek | api.fireworks.ai |
+### Local AI
+Any local model is compatible — Halyn intercepts at the process level, not the network level.
+| Runtime | Models | Notes |
+|---------|--------|-------|
+| **Ollama** | Llama 3.3, Qwen2.5, Mistral, DeepSeek-R1, Phi-4, Gemma 3, ... | OpenAI-compatible API |
+| **LM Studio** | Any GGUF model | OpenAI-compatible server |
+| **Jan.ai** | Any GGUF or ONNX model | Desktop + server mode |
+| **GPT4All** | Llama, Mistral, Phi variants | Offline, no telemetry |
+| **llama.cpp** | Any GGUF model directly | Server mode (`--server`) |
+| **LocalAI** | 100+ models, any GGUF | Drop-in OpenAI replacement |
+| **text-generation-webui** | Any HuggingFace model | Extension ecosystem |
+| **KoboldCpp** | Any GGUF model | Focus on creative writing |
+| **OpenWebUI** | Ollama + OpenAI frontend | Browser-based |
+| **AnythingLLM** | Multi-model workspace | Team-friendly |
+| **Xinference** | HuggingFace + GGUF | Enterprise local inference |
+| **vLLM** | HuggingFace models | High-throughput server |
+| **TGI (HuggingFace)** | HuggingFace models | Production inference |
+### Agentic frameworks
+Halyn intercepts any agentic system. The agent framework doesn't matter.
+| Framework | Notes |
+|-----------|-------|
+| **OpenClaw** | Full interceptor — every action audited |
+| **Claude Cowork** | Proxy + filesystem hooks |
+| **Claude Code** | Process-level monitoring |
+| **LangChain** | API calls intercepted automatically |
+| **LlamaIndex** | API calls intercepted automatically |
+| **AutoGen** | API calls intercepted automatically |
+| **CrewAI** | API calls intercepted automatically |
+| **Semantic Kernel** | API calls intercepted automatically |
+| **BeeQ** | Native AAP integration |
+| **Any MCP agent** | MCP server passthrough |
+| **Any A2A agent** | Network-level interception |
+| **Any OpenAI-compatible API** | Universal proxy compatibility |
+### The rule
+> If an AI touches your machine or calls an API — Halyn sees it.
+> No exception. No exclusion. That's the point.
 ---

{halyn-2.1.1 → halyn-2.1.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "halyn"
-version = "2.1.1"
+version = "2.1.2"
 description = "Halyn — The governance layer for AI agents. Every action intercepted. Every decision auditable."
 requires-python = ">=3.10"
 license = {text = "BSL-1.1"}

{halyn-2.1.1 → halyn-2.1.2}/src/halyn/__init__.py RENAMED Viewed

@@ -9,7 +9,7 @@ Every action intercepted. Every decision auditable.
 The AI cannot bypass it.
 """
-__version__ = "2.1.1"
+__version__ = "2.1.2"
 __author__ = "Elmadani SALKA"
 __license__ = "BSL-1.1"
 __email__ = "contact@halyn.dev"

halyn-2.1.2/src/halyn/llm.py ADDED Viewed

@@ -0,0 +1,362 @@
+# Copyright (c) 2026 Elmadani SALKA
+# Licensed under BSL-1.1. See LICENSE file.
+# Commercial use requires a license — contact@halyn.dev
+"""
+LLM Connector — Multi-provider abstraction.
+Halyn is provider-agnostic. It monitors any AI, regardless of origin.
+This module provides connectors for direct LLM integration (optional).
+The proxy layer works independently — without any connector configured.
+Supported by the proxy (no connector needed):
+  Cloud: Anthropic, OpenAI, Google, Mistral, xAI, DeepSeek, Cohere,
+         Perplexity, 01.AI, Alibaba, Baidu, Amazon Bedrock, Azure OpenAI,
+         NVIDIA NIM, Together AI, Groq, Fireworks AI, and any HTTP API.
+  Local: Ollama, LM Studio, Jan.ai, GPT4All, llama.cpp, LocalAI,
+         text-generation-webui, KoboldCpp, vLLM, TGI, and any local server.
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+import urllib.request
+from abc import ABC, abstractmethod
+from typing import Any
+log = logging.getLogger(__name__)
+# ─── Base ─────────────────────────────────────────────────────────────────────
+class LLMResponse:
+    def __init__(self, content: str, model: str = "", usage: dict | None = None) -> None:
+        self.content = content
+        self.model = model
+        self.usage = usage or {}
+class LLMConnector(ABC):
+    """Base class for all LLM connectors."""
+    @abstractmethod
+    def complete(
+        self,
+        messages: list[dict],
+        system: str = "",
+        max_tokens: int = 1024,
+    ) -> LLMResponse:
+        ...
+# ─── Cloud providers ──────────────────────────────────────────────────────────
+class AnthropicConnector(LLMConnector):
+    """Anthropic — Claude Sonnet 4.6, Opus 4.6, Haiku 4.5."""
+    def __init__(self, api_key: str = "", model: str = "claude-sonnet-4-6") -> None:
+        self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "")
+        self.model = model
+        self.endpoint = "https://api.anthropic.com/v1/messages"
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        body: dict[str, Any] = {"model": self.model, "max_tokens": max_tokens, "messages": messages}
+        if system:
+            body["system"] = system
+        data = self._post(self.endpoint, body, {
+            "x-api-key": self.api_key,
+            "anthropic-version": "2023-06-01",
+        })
+        content = data["content"][0]["text"]
+        return LLMResponse(content, self.model, data.get("usage"))
+    def _post(self, url: str, body: dict, headers: dict) -> dict:
+        payload = json.dumps(body).encode()
+        req = urllib.request.Request(url, data=payload, method="POST")
+        req.add_header("Content-Type", "application/json")
+        for k, v in headers.items():
+            req.add_header(k, v)
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            return json.loads(resp.read())
+class OpenAIConnector(LLMConnector):
+    """OpenAI — GPT-4.1, GPT-4.1 mini, GPT-4.1 nano, o3, o4-mini.
+    Also compatible with: Azure OpenAI, NVIDIA NIM, Together AI, Groq,
+    Fireworks AI, and any OpenAI-compatible endpoint.
+    """
+    def __init__(
+        self,
+        api_key: str = "",
+        model: str = "gpt-4.1",
+        endpoint: str = "https://api.openai.com/v1",
+    ) -> None:
+        self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
+        self.model = model
+        self.endpoint = endpoint.rstrip("/")
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        msgs = ([{"role": "system", "content": system}] if system else []) + messages
+        body = {"model": self.model, "max_tokens": max_tokens, "messages": msgs}
+        data = self._post(f"{self.endpoint}/chat/completions", body)
+        content = data["choices"][0]["message"]["content"]
+        return LLMResponse(content, self.model, data.get("usage"))
+    def _post(self, url: str, body: dict) -> dict:
+        payload = json.dumps(body).encode()
+        req = urllib.request.Request(url, data=payload, method="POST")
+        req.add_header("Content-Type", "application/json")
+        req.add_header("Authorization", f"Bearer {self.api_key}")
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            return json.loads(resp.read())
+class GeminiConnector(LLMConnector):
+    """Google — Gemini 3.1 Pro, Gemini 3.1 Flash, Gemini 3.1 Flash-Lite."""
+    def __init__(self, api_key: str = "", model: str = "gemini-3.1-flash-lite-preview") -> None:
+        self.api_key = api_key or os.environ.get("GOOGLE_API_KEY", "")
+        self.model = model
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        url = (
+            f"https://generativelanguage.googleapis.com/v1beta/models/"
+            f"{self.model}:generateContent?key={self.api_key}"
+        )
+        contents = [{"role": m["role"].replace("assistant", "model"), "parts": [{"text": m["content"]}]}
+                    for m in messages]
+        body: dict[str, Any] = {
+            "contents": contents,
+            "generationConfig": {"maxOutputTokens": max_tokens},
+        }
+        if system:
+            body["systemInstruction"] = {"parts": [{"text": system}]}
+        payload = json.dumps(body).encode()
+        req = urllib.request.Request(url, data=payload, method="POST")
+        req.add_header("Content-Type", "application/json")
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            data = json.loads(resp.read())
+        content = data["candidates"][0]["content"]["parts"][0]["text"]
+        return LLMResponse(content, self.model)
+class MistralConnector(LLMConnector):
+    """Mistral AI — Mistral Large 2, Mistral Small 3, Codestral."""
+    def __init__(self, api_key: str = "", model: str = "mistral-small-latest") -> None:
+        self.api_key = api_key or os.environ.get("MISTRAL_API_KEY", "")
+        self.model = model
+        self._oa = OpenAIConnector(api_key, model, "https://api.mistral.ai/v1")
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        self._oa.api_key = self.api_key
+        return self._oa.complete(messages, system, max_tokens)
+class XAIConnector(LLMConnector):
+    """xAI — Grok-3, Grok-3 mini."""
+    def __init__(self, api_key: str = "", model: str = "grok-3-mini") -> None:
+        self.api_key = api_key or os.environ.get("XAI_API_KEY", "")
+        self._oa = OpenAIConnector(api_key, model, "https://api.x.ai/v1")
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        self._oa.api_key = self.api_key
+        return self._oa.complete(messages, system, max_tokens)
+class DeepSeekConnector(LLMConnector):
+    """DeepSeek — DeepSeek-V3, DeepSeek-R1."""
+    def __init__(self, api_key: str = "", model: str = "deepseek-chat") -> None:
+        self.api_key = api_key or os.environ.get("DEEPSEEK_API_KEY", "")
+        self._oa = OpenAIConnector(api_key, model, "https://api.deepseek.com/v1")
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        self._oa.api_key = self.api_key
+        return self._oa.complete(messages, system, max_tokens)
+class GroqConnector(LLMConnector):
+    """Groq — Llama, Mixtral, Gemma (ultra-fast inference)."""
+    def __init__(self, api_key: str = "", model: str = "llama-3.3-70b-versatile") -> None:
+        self.api_key = api_key or os.environ.get("GROQ_API_KEY", "")
+        self._oa = OpenAIConnector(api_key, model, "https://api.groq.com/openai/v1")
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        self._oa.api_key = self.api_key
+        return self._oa.complete(messages, system, max_tokens)
+class PerplexityConnector(LLMConnector):
+    """Perplexity — Sonar Pro, Sonar, Sonar Reasoning."""
+    def __init__(self, api_key: str = "", model: str = "sonar") -> None:
+        self.api_key = api_key or os.environ.get("PERPLEXITY_API_KEY", "")
+        self._oa = OpenAIConnector(api_key, model, "https://api.perplexity.ai")
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        self._oa.api_key = self.api_key
+        return self._oa.complete(messages, system, max_tokens)
+class CohereConnector(LLMConnector):
+    """Cohere — Command R+, Command R, Aya."""
+    def __init__(self, api_key: str = "", model: str = "command-r-plus") -> None:
+        self.api_key = api_key or os.environ.get("COHERE_API_KEY", "")
+        self.model = model
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        url = "https://api.cohere.com/v2/chat"
+        chat_history = [{"role": m["role"], "content": m["content"]} for m in messages[:-1]]
+        body: dict[str, Any] = {
+            "model": self.model,
+            "messages": [{"role": m["role"], "content": m["content"]} for m in messages],
+            "max_tokens": max_tokens,
+        }
+        if system:
+            body["system"] = system
+        payload = json.dumps(body).encode()
+        req = urllib.request.Request(url, data=payload, method="POST")
+        req.add_header("Content-Type", "application/json")
+        req.add_header("Authorization", f"Bearer {self.api_key}")
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            data = json.loads(resp.read())
+        content = data["message"]["content"][0]["text"]
+        return LLMResponse(content, self.model)
+# ─── Local providers ──────────────────────────────────────────────────────────
+class OllamaConnector(LLMConnector):
+    """Ollama — any local model: Llama 3.3, Qwen2.5, Mistral, DeepSeek-R1,
+    Phi-4, Gemma 3, and hundreds more. OpenAI-compatible API.
+    """
+    def __init__(self, model: str = "llama3.2", host: str = "http://localhost:11434") -> None:
+        self.model = model
+        self._oa = OpenAIConnector("ollama", model, f"{host}/v1")
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        return self._oa.complete(messages, system, max_tokens)
+class LocalAIConnector(LLMConnector):
+    """LocalAI / LM Studio / Jan.ai / KoboldCpp / text-generation-webui /
+    llama.cpp / vLLM / TGI — any OpenAI-compatible local server.
+    """
+    def __init__(
+        self,
+        model: str = "local-model",
+        host: str = "http://localhost:8080",
+        api_key: str = "local",
+    ) -> None:
+        self.model = model
+        self._oa = OpenAIConnector(api_key, model, host)
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        return self._oa.complete(messages, system, max_tokens)
+class HuggingFaceConnector(LLMConnector):
+    """HuggingFace — any model via transformers pipeline (fully local)."""
+    def __init__(self, model: str = "microsoft/Phi-4") -> None:
+        self.model_name = model
+        self._pipeline: Any = None
+    def _load(self) -> None:
+        if self._pipeline is None:
+            try:
+                from transformers import pipeline  # type: ignore
+            except ImportError as e:
+                raise ImportError("pip install transformers torch") from e
+            log.info("llm.loading model=%s", self.model_name)
+            self._pipeline = pipeline("text-generation", model=self.model_name, device_map="auto")
+            log.info("llm.loaded model=%s", self.model_name)
+    def complete(self, messages: list[dict], system: str = "", max_tokens: int = 1024) -> LLMResponse:
+        self._load()
+        prompt = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
+        if system:
+            prompt = f"system: {system}\n{prompt}"
+        result = self._pipeline(prompt, max_new_tokens=max_tokens, do_sample=False)
+        text = result[0]["generated_text"][len(prompt):].strip()
+        return LLMResponse(text, self.model_name)
+# ─── Factory ──────────────────────────────────────────────────────────────────
+def create_connector(provider: str, **kwargs: Any) -> LLMConnector:
+    """
+    Create a connector by provider name.
+    Cloud providers: anthropic, openai, azure, google, gemini, mistral,
+                     xai, grok, deepseek, cohere, perplexity, groq,
+                     fireworks, together, nvidia, bedrock
+    Local providers: ollama, lmstudio, jan, localai, llamacpp, gpt4all,
+                     kobold, vllm, tgi, huggingface, openai-compatible
+    """
+    # Normalize aliases
+    aliases = {
+        # Anthropic
+        "anthropic": "anthropic", "claude": "anthropic",
+        # OpenAI
+        "openai": "openai", "gpt": "openai", "azure": "openai",
+        "nvidia": "openai", "together": "openai", "fireworks": "openai",
+        "bedrock": "openai",  # via OpenAI-compat gateway
+        # Google
+        "google": "google", "gemini": "google",
+        # Mistral
+        "mistral": "mistral",
+        # xAI
+        "xai": "xai", "grok": "xai",
+        # DeepSeek
+        "deepseek": "deepseek",
+        # Groq
+        "groq": "groq",
+        # Perplexity
+        "perplexity": "perplexity", "sonar": "perplexity",
+        # Cohere
+        "cohere": "cohere", "command": "cohere",
+        # Local
+        "ollama": "ollama",
+        "lmstudio": "local", "jan": "local", "janai": "local",
+        "localai": "local", "llamacpp": "local", "llama.cpp": "local",
+        "kobold": "local", "koboldcpp": "local",
+        "vllm": "local", "tgi": "local",
+        "gpt4all": "local", "openwebui": "local",
+        "local": "local", "openai-compatible": "local",
+        # HuggingFace
+        "huggingface": "hf", "hf": "hf", "transformers": "hf",
+    }
+    connectors = {
+        "anthropic": AnthropicConnector,
+        "openai": OpenAIConnector,
+        "google": GeminiConnector,
+        "mistral": MistralConnector,
+        "xai": XAIConnector,
+        "deepseek": DeepSeekConnector,
+        "groq": GroqConnector,
+        "perplexity": PerplexityConnector,
+        "cohere": CohereConnector,
+        "ollama": OllamaConnector,
+        "local": LocalAIConnector,
+        "hf": HuggingFaceConnector,
+    }
+    key = aliases.get(provider.lower(), provider.lower())
+    cls = connectors.get(key)
+    if cls is None:
+        raise ValueError(
+            f"Unknown provider '{provider}'. "
+            f"Note: the Halyn proxy works with ANY AI regardless of this connector."
+        )
+    return cls(**kwargs)

{halyn-2.1.1 → halyn-2.1.2/src/halyn.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: halyn
-Version: 2.1.1
+Version: 2.1.2
 Summary: Halyn — The governance layer for AI agents. Every action intercepted. Every decision auditable.
 Author-email: Elmadani SALKA <contact@halyn.dev>
 License: BSL-1.1
@@ -157,18 +157,84 @@ Deployed via `/etc/opt/chrome/policies/managed/halyn.json` — the agent cannot
 ---
-## Supported Agents
-| Agent | Type | Integration |
-|-------|------|-------------|
-| Claude Sonnet 4.6 / Opus 4.6 (Anthropic) | Cloud | Proxy + MCP |
-| GPT-4.1 / o3 (OpenAI) | Cloud | Proxy |
-| Gemini 3.1 Pro / Flash (Google) | Cloud | Proxy |
-| Ollama | Local | Direct |
-| LM Studio | Local | Direct |
-| Jan.ai | Local | Direct |
-| OpenClaw | Agentic | Interceptor |
-| Any MCP agent | Any | MCP server |
+## Compatible AI
+Halyn intercepts at the kernel and proxy level. It does not care which AI is running — it audits all of them equally. No AI is excluded.
+### How compatibility works
+Halyn intercepts three things:
+- **API calls** (iptables REDIRECT on port 443/80) — catches any HTTP request to any AI provider
+- **Filesystem access** (inotify/FSEvents/eBPF) — catches any agent touching files, regardless of origin
+- **Process syscalls** (eBPF, Linux ≥5.8) — catches any agent at the kernel level
+This means: if an AI agent makes an API call or accesses your system, Halyn sees it.
+### Cloud AI
+| Provider | Models (March 2026) | API |
+|----------|---------------------|-----|
+| **Anthropic** | Claude Sonnet 4.6, Claude Opus 4.6, Claude Haiku 4.5 | api.anthropic.com |
+| **OpenAI** | GPT-4.1, GPT-4.1 mini, GPT-4.1 nano, o3, o4-mini | api.openai.com |
+| **Google** | Gemini 3.1 Pro, Gemini 3.1 Flash, Gemini 3.1 Flash-Lite | generativelanguage.googleapis.com |
+| **Mistral AI** | Mistral Large 2, Mistral Small 3, Codestral | api.mistral.ai |
+| **xAI** | Grok-3, Grok-3 mini | api.x.ai |
+| **DeepSeek** | DeepSeek-V3, DeepSeek-R1 | api.deepseek.com |
+| **Cohere** | Command R+, Command R, Aya | api.cohere.com |
+| **Perplexity** | Sonar Pro, Sonar, Sonar Reasoning | api.perplexity.ai |
+| **01.AI** | Yi-Large, Yi-Vision | api.01.ai |
+| **Alibaba** | Qwen-Max, Qwen-Plus, Qwen-Turbo | dashscope.aliyuncs.com |
+| **Baidu** | ERNIE 4.5, ERNIE Speed | aip.baidubce.com |
+| **Amazon Bedrock** | Claude, Titan, Llama, Mistral (via AWS) | bedrock.amazonaws.com |
+| **Azure OpenAI** | GPT-4.1, o3 (via Microsoft) | *.openai.azure.com |
+| **NVIDIA NIM** | Llama 3.3, Mistral, DeepSeek-R1 (on NVIDIA cloud) | integrate.api.nvidia.com |
+| **Together AI** | 50+ open models via API | api.together.xyz |
+| **Groq** | Llama, Mixtral, Gemma (ultra-fast inference) | api.groq.com |
+| **Fireworks AI** | Llama, Mixtral, DeepSeek | api.fireworks.ai |
+### Local AI
+Any local model is compatible — Halyn intercepts at the process level, not the network level.
+| Runtime | Models | Notes |
+|---------|--------|-------|
+| **Ollama** | Llama 3.3, Qwen2.5, Mistral, DeepSeek-R1, Phi-4, Gemma 3, ... | OpenAI-compatible API |
+| **LM Studio** | Any GGUF model | OpenAI-compatible server |
+| **Jan.ai** | Any GGUF or ONNX model | Desktop + server mode |
+| **GPT4All** | Llama, Mistral, Phi variants | Offline, no telemetry |
+| **llama.cpp** | Any GGUF model directly | Server mode (`--server`) |
+| **LocalAI** | 100+ models, any GGUF | Drop-in OpenAI replacement |
+| **text-generation-webui** | Any HuggingFace model | Extension ecosystem |
+| **KoboldCpp** | Any GGUF model | Focus on creative writing |
+| **OpenWebUI** | Ollama + OpenAI frontend | Browser-based |
+| **AnythingLLM** | Multi-model workspace | Team-friendly |
+| **Xinference** | HuggingFace + GGUF | Enterprise local inference |
+| **vLLM** | HuggingFace models | High-throughput server |
+| **TGI (HuggingFace)** | HuggingFace models | Production inference |
+### Agentic frameworks
+Halyn intercepts any agentic system. The agent framework doesn't matter.
+| Framework | Notes |
+|-----------|-------|
+| **OpenClaw** | Full interceptor — every action audited |
+| **Claude Cowork** | Proxy + filesystem hooks |
+| **Claude Code** | Process-level monitoring |
+| **LangChain** | API calls intercepted automatically |
+| **LlamaIndex** | API calls intercepted automatically |
+| **AutoGen** | API calls intercepted automatically |
+| **CrewAI** | API calls intercepted automatically |
+| **Semantic Kernel** | API calls intercepted automatically |
+| **BeeQ** | Native AAP integration |
+| **Any MCP agent** | MCP server passthrough |
+| **Any A2A agent** | Network-level interception |
+| **Any OpenAI-compatible API** | Universal proxy compatibility |
+### The rule
+> If an AI touches your machine or calls an API — Halyn sees it.
+> No exception. No exclusion. That's the point.
 ---

halyn-2.1.1/src/halyn/llm.py DELETED Viewed

@@ -1,180 +0,0 @@
-# Copyright (c) 2026 Elmadani SALKA
-# Licensed under BSL-1.1. See LICENSE file.
-# Commercial use requires a license — contact@halyn.dev
-"""
-LLM Connector — Multi-provider LLM abstraction.
-Supports: Claude API, OpenAI API, Ollama (local), HuggingFace (local),
-          vLLM (self-hosted), any OpenAI-compatible endpoint.
-The LLM is NOT in the control plane. It connects FROM OUTSIDE via MCP.
-This module handles outbound LLM calls when Halyn needs reasoning
-(e.g. for autonomous reasoning, incident analysis, summarization).
-"""
-from __future__ import annotations
-import json
-import logging
-import os
-from abc import ABC, abstractmethod
-from typing import Any
-log = logging.getLogger("halyn.llm")
-class LLMConnector(ABC):
-    """Base class for LLM connections."""
-    @abstractmethod
-    async def complete(self, prompt: str, system: str = "", max_tokens: int = 1000) -> str:
-        """Send prompt, get response."""
-    @abstractmethod
-    async def is_available(self) -> bool:
-        """Check if LLM is reachable."""
-class ClaudeConnector(LLMConnector):
-    """Anthropic Claude API."""
-    def __init__(self, api_key: str = "", model: str = "claude-sonnet-4-6") -> None:
-        self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "")
-        self.model = model
-        self.endpoint = "https://api.anthropic.com/v1/messages"
-    async def complete(self, prompt: str, system: str = "", max_tokens: int = 1000) -> str:
-        import aiohttp
-        headers = {
-            "x-api-key": self.api_key,
-            "anthropic-version": "2023-06-01",
-            "content-type": "application/json",
-        }
-        body: dict[str, Any] = {
-            "model": self.model,
-            "max_tokens": max_tokens,
-            "messages": [{"role": "user", "content": prompt}],
-        }
-        if system:
-            body["system"] = system
-        async with aiohttp.ClientSession() as session:
-            async with session.post(self.endpoint, json=body, headers=headers) as resp:
-                data = await resp.json()
-                return data.get("content", [{}])[0].get("text", "")
-    async def is_available(self) -> bool:
-        return bool(self.api_key)
-class OpenAIConnector(LLMConnector):
-    """OpenAI or any OpenAI-compatible API (vLLM, LiteLLM, etc.)."""
-    def __init__(self, api_key: str = "", model: str = "gpt-4.1",
-                 endpoint: str = "https://api.openai.com/v1") -> None:
-        self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
-        self.model = model
-        self.endpoint = endpoint
-    async def complete(self, prompt: str, system: str = "", max_tokens: int = 1000) -> str:
-        import aiohttp
-        headers = {
-            "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json",
-        }
-        messages = []
-        if system:
-            messages.append({"role": "system", "content": system})
-        messages.append({"role": "user", "content": prompt})
-        body = {"model": self.model, "max_tokens": max_tokens, "messages": messages}
-        async with aiohttp.ClientSession() as session:
-            async with session.post(f"{self.endpoint}/chat/completions",
-                                    json=body, headers=headers) as resp:
-                data = await resp.json()
-                return data.get("choices", [{}])[0].get("message", {}).get("content", "")
-    async def is_available(self) -> bool:
-        return bool(self.api_key)
-class OllamaConnector(LLMConnector):
-    """Ollama local inference. Zero cost, zero internet."""
-    def __init__(self, model: str = "llama3.2", host: str = "http://localhost:11434") -> None:
-        self.model = model
-        self.host = host
-    async def complete(self, prompt: str, system: str = "", max_tokens: int = 1000) -> str:
-        import aiohttp
-        body: dict[str, Any] = {
-            "model": self.model,
-            "prompt": prompt,
-            "stream": False,
-        }
-        if system:
-            body["system"] = system
-        async with aiohttp.ClientSession() as session:
-            async with session.post(f"{self.host}/api/generate", json=body) as resp:
-                data = await resp.json()
-                return data.get("response", "")
-    async def is_available(self) -> bool:
-        try:
-            import aiohttp
-            async with aiohttp.ClientSession() as session:
-                async with session.get(f"{self.host}/api/tags", timeout=aiohttp.ClientTimeout(total=3)) as resp:
-                    return resp.status == 200
-        except Exception:
-            return False
-class HuggingFaceConnector(LLMConnector):
-    """Run any HuggingFace model locally. Zero cloud."""
-    def __init__(self, model: str = "mistralai/Mistral-7B-Instruct-v0.3") -> None:
-        self.model_name = model
-        self._pipeline: Any = None
-    async def complete(self, prompt: str, system: str = "", max_tokens: int = 1000) -> str:
-        if self._pipeline is None:
-            self._load()
-        full_prompt = f"{system}
-{prompt}" if system else prompt
-        result = self._pipeline(full_prompt, max_new_tokens=max_tokens, do_sample=True, temperature=0.7)
-        return result[0]["generated_text"][len(full_prompt):]
-    async def is_available(self) -> bool:
-        try:
-            import transformers  # noqa: F401
-            return True
-        except ImportError:
-            return False
-    def _load(self) -> None:
-        from transformers import pipeline
-        log.info("llm.loading model=%s (this may take a while...)", self.model_name)
-        self._pipeline = pipeline("text-generation", model=self.model_name, device_map="auto")
-        log.info("llm.loaded model=%s", self.model_name)
-# ─── Factory ────────────────────────────────────────
-def create_connector(provider: str, **kwargs: Any) -> LLMConnector:
-    """Create an LLM connector by name."""
-    connectors: dict[str, type[LLMConnector]] = {
-        "claude": ClaudeConnector,
-        "anthropic": ClaudeConnector,
-        "openai": OpenAIConnector,
-        "gpt": OpenAIConnector,
-        "ollama": OllamaConnector,
-        "huggingface": HuggingFaceConnector,
-        "hf": HuggingFaceConnector,
-        "vllm": OpenAIConnector,  # vLLM is OpenAI-compatible
-        "litellm": OpenAIConnector,
-    }
-    cls = connectors.get(provider.lower())
-    if cls is None:
-        raise ValueError(f"Unknown LLM provider: {provider}. Available: {list(connectors.keys())}")
-    return cls(**kwargs)