ops-copilot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ from .graph import InvestigationGraph
2
+ from .secrets import redact_secrets
3
+ from .ssh import SSHClient, SSHError
4
+ from .tools.base import RemoteTool, ToolResult
5
+ from .tools.registry import ToolRegistry
6
+ from .tools.shell import ShellTool
7
+
8
+ __all__ = [
9
+ "InvestigationGraph",
10
+ "RemoteTool",
11
+ "SSHClient",
12
+ "SSHError",
13
+ "ShellTool",
14
+ "ToolRegistry",
15
+ "ToolResult",
16
+ "redact_secrets",
17
+ ]
ops_copilot/graph.py ADDED
@@ -0,0 +1,191 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import time
5
+ from collections.abc import AsyncGenerator, Callable, Sequence
6
+ from typing import Annotated, Any
7
+
8
+ from langchain_core.language_models.chat_models import BaseChatModel
9
+ from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
10
+ from langgraph.graph import END, StateGraph
11
+ from langgraph.graph.message import add_messages
12
+ from langgraph.prebuilt import ToolNode
13
+ from lc_content_normalizer import (
14
+ build_human_message_content,
15
+ extract_text_content,
16
+ normalize_tool_output,
17
+ )
18
+ from typing_extensions import TypedDict
19
+
20
+ from ops_copilot.sanitizers import sanitize_agent_output
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class AgentState(TypedDict):
26
+ messages: Annotated[Sequence[BaseMessage], add_messages]
27
+ tools_used: list[str]
28
+ iteration: int
29
+
30
+
31
+ class InvestigationGraph:
32
+ """Two-node LangGraph investigation loop: agent -> tools -> agent."""
33
+
34
+ def __init__(
35
+ self,
36
+ llm: BaseChatModel,
37
+ tools: list,
38
+ *,
39
+ system_prompt: str,
40
+ vision_format: str = "openai",
41
+ sanitize_output: Callable[[str], str] = sanitize_agent_output,
42
+ ) -> None:
43
+ if not system_prompt:
44
+ raise ValueError("system_prompt is required")
45
+ self._system_prompt = system_prompt
46
+ self._vision_format = vision_format
47
+ self._sanitize_output = sanitize_output
48
+ self._tool_node = ToolNode(tools, handle_tool_errors=True)
49
+ try:
50
+ self._llm_with_tools = llm.bind_tools(tools) if tools else llm
51
+ except NotImplementedError:
52
+ self._llm_with_tools = llm
53
+ self._graph = self._build_graph()
54
+
55
+ @property
56
+ def vision_format(self) -> str:
57
+ return self._vision_format
58
+
59
+ def _build_graph(self) -> Any:
60
+ graph = StateGraph(AgentState)
61
+ graph.add_node("agent", self._agent_node)
62
+ graph.add_node("tools", self._tool_node_wrapper)
63
+ graph.set_entry_point("agent")
64
+ graph.add_conditional_edges("agent", self._should_continue, {"tools": "tools", "end": END})
65
+ graph.add_edge("tools", "agent")
66
+ return graph.compile()
67
+
68
+ async def _agent_node(self, state: AgentState) -> dict[str, Any]:
69
+ iteration = state.get("iteration", 0) + 1
70
+ response = await self._llm_with_tools.ainvoke(list(state["messages"]))
71
+ logger.debug(
72
+ "agent_iteration iteration=%d tool_calls=%d",
73
+ iteration,
74
+ len(response.tool_calls),
75
+ )
76
+ return {"messages": [response], "iteration": iteration}
77
+
78
+ async def _tool_node_wrapper(self, state: AgentState) -> dict[str, Any]:
79
+ result = await self._tool_node.ainvoke(state)
80
+ tools_used = list(state.get("tools_used", []))
81
+ for message in result.get("messages", []):
82
+ if isinstance(message, ToolMessage):
83
+ if message.name and message.name not in tools_used:
84
+ tools_used.append(message.name)
85
+ content = extract_text_content(message.content)
86
+ if content.startswith("[TOOL ERROR]"):
87
+ logger.warning("tool_error name=%s output=%s", message.name, content[:300])
88
+ message.content = self._sanitize_output(content)
89
+ return {**result, "tools_used": tools_used}
90
+
91
+ def _should_continue(self, state: AgentState) -> str:
92
+ last = state["messages"][-1]
93
+ return "tools" if isinstance(last, AIMessage) and last.tool_calls else "end"
94
+
95
+ async def run(
96
+ self,
97
+ user_message: str,
98
+ *,
99
+ history: list[BaseMessage] | None = None,
100
+ images: list[dict[str, str]] | None = None,
101
+ ) -> dict[str, Any]:
102
+ messages = self._build_messages(user_message, history=history, images=images)
103
+ start = time.monotonic()
104
+ final_state = await self._graph.ainvoke(
105
+ {"messages": messages, "tools_used": [], "iteration": 0}
106
+ )
107
+ duration = time.monotonic() - start
108
+ new_messages = list(final_state["messages"])[len(messages) :]
109
+ for message in new_messages:
110
+ if isinstance(message, AIMessage) and message.content:
111
+ message.content = self._sanitize_output(extract_text_content(message.content))
112
+ return {
113
+ "messages": new_messages,
114
+ "tools_used": final_state.get("tools_used", []),
115
+ "duration": duration,
116
+ }
117
+
118
+ async def stream(
119
+ self,
120
+ user_message: str,
121
+ *,
122
+ history: list[BaseMessage] | None = None,
123
+ images: list[dict[str, str]] | None = None,
124
+ ) -> AsyncGenerator[dict[str, Any], None]:
125
+ messages = self._build_messages(user_message, history=history, images=images)
126
+ try:
127
+ async for event in self._graph.astream_events(
128
+ {"messages": messages, "tools_used": [], "iteration": 0},
129
+ version="v2",
130
+ ):
131
+ event_name = event["event"]
132
+ if event_name == "on_chat_model_stream":
133
+ chunk = event["data"].get("chunk")
134
+ text = extract_text_content(getattr(chunk, "content", None)) if chunk else ""
135
+ if text:
136
+ yield {"event": "token", "data": self._sanitize_output(text)}
137
+ elif event_name == "on_tool_start":
138
+ tool_name = event.get("name", "unknown")
139
+ call_id = event.get("run_id") or event.get("data", {}).get("id") or ""
140
+ step_id = (
141
+ f"{tool_name}:{call_id}"
142
+ if call_id
143
+ else f"{tool_name}:{time.monotonic_ns()}"
144
+ )
145
+ yield {
146
+ "event": "tool_start",
147
+ "data": {
148
+ "tool": tool_name,
149
+ "input": event.get("data", {}).get("input", {}),
150
+ "call_id": str(call_id),
151
+ "step_id": step_id,
152
+ },
153
+ }
154
+ elif event_name == "on_tool_end":
155
+ tool_name = event.get("name", "unknown")
156
+ call_id = event.get("run_id") or event.get("data", {}).get("id") or ""
157
+ step_id = f"{tool_name}:{call_id}" if call_id else ""
158
+ output = self._sanitize_output(
159
+ normalize_tool_output(event.get("data", {}).get("output", ""))
160
+ )
161
+ yield {
162
+ "event": "tool_end",
163
+ "data": {
164
+ "tool": tool_name,
165
+ "output": output,
166
+ "call_id": str(call_id),
167
+ "step_id": step_id,
168
+ },
169
+ }
170
+ except Exception as exc:
171
+ logger.error("stream_investigation_failed", exc_info=True)
172
+ yield {"event": "error", "data": {"error": str(exc)}}
173
+ return
174
+ yield {"event": "done", "data": {}}
175
+
176
+ def _build_messages(
177
+ self,
178
+ user_message: str,
179
+ *,
180
+ history: list[BaseMessage] | None = None,
181
+ images: list[dict[str, str]] | None = None,
182
+ ) -> list[BaseMessage]:
183
+ messages: list[BaseMessage] = [SystemMessage(content=self._system_prompt)]
184
+ if history:
185
+ messages.extend(history)
186
+ messages.append(
187
+ HumanMessage(
188
+ content=build_human_message_content(user_message, images, self._vision_format)
189
+ )
190
+ )
191
+ return messages
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ from ops_copilot.secrets import redact_secrets
6
+
7
+ _IMAGE_PLACEHOLDER_RE = re.compile(r"\[Image\s+\d+\]", re.IGNORECASE)
8
+ _CLIPBOARD_ERROR_RE = re.compile(
9
+ r"ERROR:\s*Cannot read \"clipboard\" "
10
+ r"\(this model does not support image input\)\.?(?:\s*Inform the user\.?)?",
11
+ re.IGNORECASE,
12
+ )
13
+ _SYSTEM_REMINDER_RE = re.compile(
14
+ r"<system-reminder>.*?</system-reminder>",
15
+ re.IGNORECASE | re.DOTALL,
16
+ )
17
+
18
+
19
+ def sanitize_user_message(message: str) -> str:
20
+ cleaned = _SYSTEM_REMINDER_RE.sub(" ", message)
21
+ cleaned = _CLIPBOARD_ERROR_RE.sub(" ", cleaned)
22
+ cleaned = _IMAGE_PLACEHOLDER_RE.sub(" ", cleaned)
23
+ cleaned = re.sub(r"\n{2,}", "\n", cleaned)
24
+ cleaned = re.sub(r"[ \t]{2,}", " ", cleaned)
25
+ cleaned = re.sub(r" ?\n ?", "\n", cleaned)
26
+ lines = [line.strip() for line in cleaned.splitlines() if line.strip()]
27
+ return "\n".join(lines)
28
+
29
+
30
+ def sanitize_agent_output(text: str) -> str:
31
+ text = redact_secrets(text)
32
+ text = _SYSTEM_REMINDER_RE.sub("", text)
33
+ if _CLIPBOARD_ERROR_RE.search(text):
34
+ return (
35
+ "The configured model rejected image input. Use a vision-capable model "
36
+ "or paste the alert text instead."
37
+ )
38
+ return text
39
+
40
+
41
+ def build_user_display_message(message: str, image_count: int = 0) -> str:
42
+ cleaned = sanitize_user_message(message)
43
+ if image_count <= 0:
44
+ return cleaned
45
+ prefix = f"[Attached images: {image_count}]"
46
+ return f"{prefix}\n{cleaned}" if cleaned else prefix
ops_copilot/secrets.py ADDED
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from collections.abc import Iterable
5
+
6
+ __all__ = ["DEFAULT_SECRET_KEY_PATTERNS", "build_secret_key_regex", "redact_secrets"]
7
+
8
+ DEFAULT_SECRET_KEY_PATTERNS: tuple[str, ...] = (
9
+ "password",
10
+ "secret",
11
+ "token",
12
+ "api_key",
13
+ "apikey",
14
+ "access_key",
15
+ "private_key",
16
+ "credential",
17
+ )
18
+
19
+ _ENV_LINE_RE = re.compile(r"^(\s*[A-Z_][A-Z0-9_]*\s*[=:]\s*)(.+)$", re.MULTILINE)
20
+ _BEARER_RE = re.compile(r"(Bearer\s+)[^\s\"']+", re.IGNORECASE)
21
+ _OPENAI_KEY_RE = re.compile(r"(sk-[A-Za-z0-9]{3})[A-Za-z0-9]{10,}")
22
+ _JWT_RE = re.compile(r"eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}")
23
+ _HEX_RUN_RE = re.compile(r"\b[0-9a-fA-F]{32,}\b")
24
+ _DATA_URL_RE = re.compile(r"data:image/([a-zA-Z0-9.+-]+);base64,([A-Za-z0-9+/=]+)")
25
+
26
+
27
+ def build_secret_key_regex(patterns: Iterable[str] | None = None) -> re.Pattern[str]:
28
+ items = list(patterns) if patterns is not None else list(DEFAULT_SECRET_KEY_PATTERNS)
29
+ items = [item.strip() for item in items if item and item.strip()]
30
+ if not items:
31
+ return re.compile(r"(?!x)x")
32
+ return re.compile("|".join(re.escape(item) for item in items), re.IGNORECASE)
33
+
34
+
35
+ def _redact_image_data_urls(text: str) -> str:
36
+ return _DATA_URL_RE.sub(
37
+ lambda match: (
38
+ f"[REDACTED_IMAGE mime=image/{match.group(1)} bytes={len(match.group(2))}]"
39
+ ),
40
+ text,
41
+ )
42
+
43
+
44
+ _DEFAULT_REGEX = build_secret_key_regex(DEFAULT_SECRET_KEY_PATTERNS)
45
+
46
+
47
+ def redact_secrets(text: str) -> str:
48
+ if not text:
49
+ return text
50
+
51
+ def _mask_env_line(match: re.Match[str]) -> str:
52
+ prefix = match.group(1)
53
+ if _DEFAULT_REGEX.search(prefix):
54
+ return f"{prefix}[REDACTED]"
55
+ return match.group(0)
56
+
57
+ text = _ENV_LINE_RE.sub(_mask_env_line, text)
58
+ text = _BEARER_RE.sub(r"\1[REDACTED]", text)
59
+ text = _OPENAI_KEY_RE.sub(r"\1...[REDACTED]", text)
60
+ text = _JWT_RE.sub("[REDACTED_JWT]", text)
61
+ text = _HEX_RUN_RE.sub("[REDACTED_HEX]", text)
62
+ return _redact_image_data_urls(text)
ops_copilot/server.py ADDED
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from collections.abc import AsyncGenerator, Callable
6
+ from typing import Any
7
+
8
+ from fastapi import Depends, FastAPI, Header, HTTPException
9
+ from pydantic import BaseModel, Field
10
+ from sse_starlette.sse import EventSourceResponse
11
+
12
+ from ops_copilot.graph import InvestigationGraph
13
+
14
+
15
+ class ImageInput(BaseModel):
16
+ data_url: str
17
+ mime_type: str
18
+ name: str | None = None
19
+
20
+
21
+ class InvestigateRequest(BaseModel):
22
+ message: str = Field(min_length=1)
23
+ images: list[ImageInput] = Field(default_factory=list)
24
+
25
+
26
+ def verify_api_key(x_api_key: str | None = Header(default=None)) -> None:
27
+ expected = os.getenv("OPS_COPILOT_API_KEY")
28
+ if expected and x_api_key != expected:
29
+ raise HTTPException(status_code=401, detail="Invalid API key")
30
+
31
+
32
+ def create_app(graph_factory: Callable[[], InvestigationGraph]) -> FastAPI:
33
+ app = FastAPI(title="ops-copilot", version="0.1.0")
34
+
35
+ @app.post("/investigate", dependencies=[Depends(verify_api_key)])
36
+ async def investigate(request: InvestigateRequest) -> dict[str, Any]:
37
+ graph = graph_factory()
38
+ images = [image.model_dump() for image in request.images]
39
+ result = await graph.run(request.message, images=images)
40
+ return {
41
+ "messages": [
42
+ getattr(message, "content", str(message)) for message in result["messages"]
43
+ ],
44
+ "tools_used": result["tools_used"],
45
+ "duration": result["duration"],
46
+ }
47
+
48
+ @app.post("/investigate/stream", dependencies=[Depends(verify_api_key)])
49
+ async def investigate_stream(request: InvestigateRequest) -> EventSourceResponse:
50
+ graph = graph_factory()
51
+ images = [image.model_dump() for image in request.images]
52
+
53
+ async def event_generator() -> AsyncGenerator[dict[str, str], None]:
54
+ async for event in graph.stream(request.message, images=images):
55
+ data = (
56
+ event["data"] if isinstance(event["data"], str) else json.dumps(event["data"])
57
+ )
58
+ yield {"event": event["event"], "data": data}
59
+
60
+ return EventSourceResponse(event_generator())
61
+
62
+ return app
ops_copilot/ssh.py ADDED
@@ -0,0 +1,90 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import os
5
+
6
+ import asyncssh
7
+
8
+
9
+ class SSHError(Exception):
10
+ def __init__(self, message: str, exit_code: int | None = None, stderr: str = "") -> None:
11
+ super().__init__(message)
12
+ self.exit_code = exit_code
13
+ self.stderr = stderr
14
+
15
+
16
+ class SSHClient:
17
+ def __init__(
18
+ self,
19
+ host: str,
20
+ user: str,
21
+ *,
22
+ port: int = 22,
23
+ key_path: str | None = None,
24
+ password: str | None = None,
25
+ known_hosts: str | None = None,
26
+ connect_timeout: int = 10,
27
+ command_timeout: int = 30,
28
+ ) -> None:
29
+ self.host = host
30
+ self.user = user
31
+ self.port = port
32
+ self.key_path = key_path
33
+ self.password = password
34
+ self.known_hosts = known_hosts
35
+ self.connect_timeout = connect_timeout
36
+ self.command_timeout = command_timeout
37
+ self._conn: asyncssh.SSHClientConnection | None = None
38
+ self._lock = asyncio.Lock()
39
+
40
+ async def _get_connection(self) -> asyncssh.SSHClientConnection:
41
+ async with self._lock:
42
+ if self._conn is not None:
43
+ return self._conn
44
+ connect_kwargs: dict = {}
45
+ if self.key_path and os.path.exists(self.key_path):
46
+ connect_kwargs["client_keys"] = [self.key_path]
47
+ elif self.password:
48
+ connect_kwargs = {"password": self.password, "client_keys": []}
49
+ else:
50
+ raise SSHError("No SSH credentials configured: provide key_path or password")
51
+
52
+ known_hosts = (
53
+ self.known_hosts
54
+ if self.known_hosts and os.path.exists(self.known_hosts)
55
+ else None
56
+ )
57
+ self._conn = await asyncssh.connect(
58
+ host=self.host,
59
+ port=self.port,
60
+ username=self.user,
61
+ known_hosts=known_hosts,
62
+ connect_timeout=self.connect_timeout,
63
+ **connect_kwargs,
64
+ )
65
+ return self._conn
66
+
67
+ async def run(self, command: str, timeout: int | None = None) -> str:
68
+ effective_timeout = timeout or self.command_timeout
69
+ for attempt in range(2):
70
+ conn = await self._get_connection()
71
+ try:
72
+ result = await asyncio.wait_for(
73
+ conn.run(command, check=False), timeout=effective_timeout
74
+ )
75
+ if result.exit_status != 0:
76
+ raise SSHError("Command failed", result.exit_status, str(result.stderr or ""))
77
+ return str(result.stdout or "")
78
+ except TimeoutError as exc:
79
+ raise SSHError(f"Command timeout after {effective_timeout}s", -1) from exc
80
+ except asyncssh.Error as exc:
81
+ async with self._lock:
82
+ self._conn = None
83
+ if attempt == 1:
84
+ raise SSHError(f"SSH connection failed: {exc}") from exc
85
+ raise SSHError("SSH connection failed after retry")
86
+
87
+ async def close(self) -> None:
88
+ if self._conn is not None:
89
+ self._conn.close()
90
+ self._conn = None
@@ -0,0 +1,6 @@
1
+ from .base import RemoteTool, ToolResult
2
+ from .executor import LangChainToolWrapper
3
+ from .registry import ToolRegistry
4
+ from .shell import ShellTool
5
+
6
+ __all__ = ["LangChainToolWrapper", "RemoteTool", "ShellTool", "ToolRegistry", "ToolResult"]
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any
5
+
6
+ from pydantic import BaseModel
7
+
8
+ from ops_copilot.secrets import redact_secrets
9
+ from ops_copilot.ssh import SSHClient, SSHError
10
+
11
+
12
+ class ToolResult(BaseModel):
13
+ success: bool = True
14
+ output: str = ""
15
+ error: str | None = None
16
+
17
+
18
+ class RemoteTool(ABC):
19
+ name: str
20
+ description: str
21
+
22
+ def __init__(self, ssh_client: SSHClient, *, meta: dict[str, Any] | None = None) -> None:
23
+ self.ssh = ssh_client
24
+ self.meta = meta or {}
25
+
26
+ @abstractmethod
27
+ async def execute(self, **kwargs: Any) -> ToolResult:
28
+ ...
29
+
30
+ async def _run_cmd(self, command: str, timeout: int | None = None) -> str:
31
+ try:
32
+ return redact_secrets(await self.ssh.run(command, timeout=timeout))
33
+ except SSHError as exc:
34
+ return redact_secrets(f"[ERROR] {exc} | stderr: {exc.stderr}")
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any
5
+
6
+ from langchain_core.tools import BaseTool
7
+ from pydantic import BaseModel, Field, PrivateAttr, create_model
8
+
9
+ from ops_copilot.tools.base import RemoteTool
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ _TYPE_MAP: dict[str, type] = {
14
+ "string": str,
15
+ "integer": int,
16
+ "number": float,
17
+ "boolean": bool,
18
+ }
19
+
20
+
21
+ def _build_schema(tool: RemoteTool, tool_meta: dict[str, Any]) -> type[BaseModel]:
22
+ fields: dict[str, Any] = {}
23
+ for name, definition in tool_meta.get("parameters", {}).items():
24
+ ptype = _TYPE_MAP.get(definition.get("type", "string"), str)
25
+ default = definition.get("default", ...)
26
+ description = definition.get("description")
27
+ if default is ... and not definition.get("required", True):
28
+ fields[name] = (ptype | None, Field(default=None, description=description))
29
+ elif default is ...:
30
+ fields[name] = (ptype, Field(description=description))
31
+ else:
32
+ fields[name] = (ptype, Field(default=default, description=description))
33
+ if not fields:
34
+ fields["no_input"] = (str | None, Field(default=None))
35
+ return create_model(f"{tool.name}_Input", **fields)
36
+
37
+
38
+ class LangChainToolWrapper(BaseTool):
39
+ name: str = ""
40
+ description: str = ""
41
+ args_schema: Any = None
42
+ _remote_tool: RemoteTool = PrivateAttr()
43
+
44
+ model_config = {"arbitrary_types_allowed": True}
45
+
46
+ def __init__(self, remote_tool: RemoteTool, tool_meta: dict[str, Any]):
47
+ super().__init__(
48
+ name=remote_tool.name,
49
+ description=remote_tool.description,
50
+ args_schema=_build_schema(remote_tool, tool_meta),
51
+ )
52
+ self._remote_tool = remote_tool
53
+
54
+ def _run(self, **kwargs: Any) -> str:
55
+ raise NotImplementedError
56
+
57
+ async def _arun(self, **kwargs: Any) -> str:
58
+ kwargs.pop("no_input", None)
59
+ try:
60
+ result = await self._remote_tool.execute(**kwargs)
61
+ except Exception as exc:
62
+ logger.error("tool_execute_exception name=%s", self.name, exc_info=True)
63
+ return f"[TOOL ERROR] {type(exc).__name__}: {exc}"
64
+ if not result.success:
65
+ logger.warning("tool_failed name=%s error=%s", self.name, result.error)
66
+ return f"[TOOL ERROR] {result.error}"
67
+ return result.output
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ import yaml
8
+
9
+ from ops_copilot.ssh import SSHClient
10
+ from ops_copilot.tools.base import RemoteTool
11
+ from ops_copilot.tools.executor import LangChainToolWrapper
12
+ from ops_copilot.tools.shell import ShellTool
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class ToolRegistry:
18
+ def __init__(
19
+ self,
20
+ ssh_client: SSHClient,
21
+ tool_classes: dict[str, type[RemoteTool]] | None = None,
22
+ config_path: str | Path = "tools.yaml",
23
+ ) -> None:
24
+ self._ssh = ssh_client
25
+ self._tool_classes = tool_classes or {}
26
+ self._config_path = Path(config_path)
27
+ self._tools: dict[str, LangChainToolWrapper] = {}
28
+ self._meta: dict[str, dict[str, Any]] = {}
29
+
30
+ def load(self) -> list[LangChainToolWrapper]:
31
+ with self._config_path.open(encoding="utf-8") as fh:
32
+ config = yaml.safe_load(fh) or {}
33
+
34
+ loaded: list[LangChainToolWrapper] = []
35
+ for definition in self._iter_tool_definitions(config):
36
+ name = definition["name"]
37
+ tool_cls = self._resolve_tool_class(definition)
38
+ if tool_cls is None:
39
+ logger.warning("tool_unknown name=%s", name)
40
+ continue
41
+ instance = tool_cls(ssh_client=self._ssh, meta=definition)
42
+ instance.name = name
43
+ instance.description = definition.get("description", instance.description)
44
+ wrapper = LangChainToolWrapper(remote_tool=instance, tool_meta=definition)
45
+ self._tools[name] = wrapper
46
+ self._meta[name] = definition
47
+ loaded.append(wrapper)
48
+ return loaded
49
+
50
+ def get_tool(self, name: str) -> LangChainToolWrapper | None:
51
+ return self._tools.get(name)
52
+
53
+ def get_all_meta(self) -> dict[str, dict[str, Any]]:
54
+ return dict(self._meta)
55
+
56
+ def list_names(self) -> list[str]:
57
+ return list(self._tools.keys())
58
+
59
+ def _resolve_tool_class(self, definition: dict[str, Any]) -> type[RemoteTool] | None:
60
+ if definition.get("type") == "shell" or "command" in definition:
61
+ return ShellTool
62
+ return self._tool_classes.get(definition["name"])
63
+
64
+ @staticmethod
65
+ def _iter_tool_definitions(config: dict[str, Any]) -> list[dict[str, Any]]:
66
+ if "tools" in config:
67
+ return list(config.get("tools") or [])
68
+ definitions: list[dict[str, Any]] = []
69
+ for category in (config.get("categories") or {}).values():
70
+ definitions.extend(category.get("tools") or [])
71
+ return definitions
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ import string
4
+ from typing import Any
5
+
6
+ from ops_copilot.tools.base import RemoteTool, ToolResult
7
+
8
+
9
+ class ShellTool(RemoteTool):
10
+ name = "shell"
11
+ description = "Run a configured shell command over SSH."
12
+
13
+ def __init__(self, ssh_client, *, meta: dict[str, Any] | None = None) -> None:
14
+ super().__init__(ssh_client, meta=meta)
15
+ self.name = self.meta.get("name", self.name)
16
+ self.description = self.meta.get("description", self.description)
17
+ self.command = self.meta.get("command", "")
18
+ self.timeout = self.meta.get("timeout")
19
+ if not self.command:
20
+ raise ValueError("ShellTool requires a command in tool metadata")
21
+
22
+ async def execute(self, **kwargs: Any) -> ToolResult:
23
+ try:
24
+ command = self._render_command(kwargs)
25
+ output = await self._run_cmd(command, timeout=self.timeout)
26
+ if output.startswith("[ERROR]"):
27
+ return ToolResult(success=False, error=output)
28
+ return ToolResult(output=output)
29
+ except KeyError as exc:
30
+ return ToolResult(success=False, error=f"Missing command parameter: {exc.args[0]}")
31
+
32
+ def _render_command(self, values: dict[str, Any]) -> str:
33
+ formatter = string.Formatter()
34
+ names = [field_name for _, field_name, _, _ in formatter.parse(self.command) if field_name]
35
+ missing = [name for name in names if name not in values or values[name] is None]
36
+ if missing:
37
+ raise KeyError(missing[0])
38
+ return self.command.format(**values)
@@ -0,0 +1,192 @@
1
+ Metadata-Version: 2.4
2
+ Name: ops-copilot
3
+ Version: 0.1.0
4
+ Summary: Self-hosted SRE investigation copilot with YAML tools, SSH execution, SSE streaming, and secret redaction.
5
+ Project-URL: Homepage, https://github.com/benjaminjornet/ops-copilot
6
+ Project-URL: Issues, https://github.com/benjaminjornet/ops-copilot/issues
7
+ Author-email: Benjamin Jornet <benjamin.jornet@gmail.com>
8
+ License: MIT License
9
+
10
+ Copyright (c) 2026 Benjamin Jornet
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ License-File: LICENSE
30
+ Keywords: incident-response,langgraph,llm,ops,sre,ssh
31
+ Classifier: Development Status :: 3 - Alpha
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3.11
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: Programming Language :: Python :: 3.13
38
+ Requires-Python: >=3.11
39
+ Requires-Dist: asyncssh>=2.14
40
+ Requires-Dist: langchain-content-normalizer>=0.1.0
41
+ Requires-Dist: langchain-core>=0.3
42
+ Requires-Dist: langgraph>=0.2
43
+ Requires-Dist: pydantic>=2
44
+ Requires-Dist: pyyaml>=6
45
+ Provides-Extra: ollama
46
+ Requires-Dist: langchain-community>=0.3; extra == 'ollama'
47
+ Provides-Extra: openai
48
+ Requires-Dist: langchain-openai>=0.2; extra == 'openai'
49
+ Provides-Extra: server
50
+ Requires-Dist: fastapi>=0.115; extra == 'server'
51
+ Requires-Dist: sse-starlette>=2; extra == 'server'
52
+ Requires-Dist: uvicorn>=0.32; extra == 'server'
53
+ Description-Content-Type: text/markdown
54
+
55
+ # ops-copilot
56
+
57
+ [![CI](https://github.com/BenjaminJornet/ops-copilot/actions/workflows/ci.yml/badge.svg)](https://github.com/BenjaminJornet/ops-copilot/actions/workflows/ci.yml)
58
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
59
+ [![Python](https://img.shields.io/badge/python-3.11%2B-blue.svg)](pyproject.toml)
60
+
61
+ Self-hosted SRE investigation copilot for production systems.
62
+
63
+ `ops-copilot` lets an LLM call tools defined in YAML, execute safe remote commands over SSH, redact secrets from outputs, and stream investigation events through LangGraph or an optional FastAPI SSE server.
64
+
65
+ ## Architecture
66
+
67
+ ```text
68
+ User question -> InvestigationGraph -> LLM -> YAML tools -> SSH host
69
+ <- redacted tool output <- command result
70
+ ```
71
+
72
+ The package is intentionally generic. You can start with shell tools from YAML, then inject custom Python `RemoteTool` classes for richer workflows.
73
+
74
+ ## Install
75
+
76
+ ```bash
77
+ uv add ops-copilot
78
+ ```
79
+
80
+ Optional extras:
81
+
82
+ ```bash
83
+ uv add 'ops-copilot[server]'
84
+ uv add 'ops-copilot[openai]'
85
+ uv add 'ops-copilot[ollama]'
86
+ ```
87
+
88
+ ## YAML tools
89
+
90
+ ```yaml
91
+ tools:
92
+ - name: disk_usage
93
+ type: shell
94
+ description: Show filesystem usage.
95
+ command: df -h
96
+
97
+ - name: journalctl_service
98
+ type: shell
99
+ description: Show recent logs for a systemd service.
100
+ command: journalctl -u {service} --since '{since}' --no-pager
101
+ parameters:
102
+ service:
103
+ type: string
104
+ since:
105
+ type: string
106
+ required: false
107
+ default: "30 minutes ago"
108
+ ```
109
+
110
+ ## Minimal usage
111
+
112
+ ```python
113
+ from ops_copilot import InvestigationGraph, SSHClient, ToolRegistry
114
+
115
+ ssh = SSHClient(host="server.example.com", user="deploy", key_path="~/.ssh/id_ed25519")
116
+ tools = ToolRegistry(ssh, config_path="tools.yaml").load()
117
+
118
+ graph = InvestigationGraph(
119
+ llm=your_langchain_chat_model,
120
+ tools=tools,
121
+ system_prompt="You are an SRE copilot. Investigate safely and report evidence.",
122
+ )
123
+
124
+ async for event in graph.stream("The API is slow. What should I check?"):
125
+ print(event)
126
+ ```
127
+
128
+ ## Streaming events
129
+
130
+ `InvestigationGraph.stream()` yields dictionaries with these event names:
131
+
132
+ | Event | Meaning |
133
+ | --- | --- |
134
+ | `token` | streamed model text |
135
+ | `tool_start` | tool call started with input and step id |
136
+ | `tool_end` | tool call finished with redacted output |
137
+ | `error` | graph or stream error |
138
+ | `done` | investigation complete |
139
+
140
+ ## Optional FastAPI server
141
+
142
+ The `ops_copilot.server.create_app()` helper exposes:
143
+
144
+ - `POST /investigate`
145
+ - `POST /investigate/stream`
146
+
147
+ If `OPS_COPILOT_API_KEY` is set, clients must send `X-API-Key`.
148
+
149
+ ## Security notes
150
+
151
+ This project executes commands on servers you control. Treat `tools.yaml` as privileged code.
152
+
153
+ Recommendations:
154
+
155
+ - Use SSH key auth with least-privilege users.
156
+ - Review every command template before exposing it to an LLM.
157
+ - Avoid destructive commands in YAML.
158
+ - Keep parameterized commands narrow.
159
+ - Store no secrets in YAML or prompts.
160
+ - Rely on built-in redaction as a safety net, not as your only control.
161
+
162
+ Built-in redaction covers env-style secret lines, Bearer tokens, OpenAI-style keys, JWTs, long hex runs, and inline image data URLs.
163
+
164
+ ## Documentation and examples
165
+
166
+ - `docs/security-model.md` documents threat boundaries and deployment controls.
167
+ - `docs/writing-tools.md` explains YAML and custom Python tools.
168
+ - `docs/server.md` covers the optional FastAPI/SSE integration.
169
+ - `docs/maintenance-workflows.md` describes maintainer workflows and review checklists.
170
+ - `examples/local_demo.py` runs without a real SSH host using fake outputs.
171
+ - `examples/custom_tool.py` shows how to inject a custom `RemoteTool` class.
172
+
173
+ ## Roadmap
174
+
175
+ - Command allowlist validation for shell tools.
176
+ - Built-in Docker and systemd tool packs.
177
+ - Persistent investigation sessions.
178
+ - Audit log export.
179
+ - More fake incident fixtures for regression tests.
180
+
181
+ ## Development
182
+
183
+ ```bash
184
+ uv sync --dev
185
+ uv run ruff check .
186
+ uv run pytest
187
+ uv run python scripts/smoke.py
188
+ ```
189
+
190
+ ## License
191
+
192
+ MIT
@@ -0,0 +1,15 @@
1
+ ops_copilot/__init__.py,sha256=Qr4TVrJt8pmbpPVfNv2J6qoYOk-Dfvk_JIPVWf3b6Fo,403
2
+ ops_copilot/graph.py,sha256=ruDxwePfYOZpdS95GPf1TEqRuhhfWIG_RzJ-RLn0c3s,7661
3
+ ops_copilot/sanitizers.py,sha256=6qNH6QwpAouIMLbZi2RiHwy-LbtDgMrglE8CmQcm7Io,1529
4
+ ops_copilot/secrets.py,sha256=1cPbh9FOq0HOE1wfUpjgGmYXPm70VHZq59LSrNnRRCQ,2033
5
+ ops_copilot/server.py,sha256=N7X7LQQ2WDSRz5hfuDh0a1xfu4cdVVH1eyWiYXbufmQ,2190
6
+ ops_copilot/ssh.py,sha256=hNtNi8ejk2STxIwNHCRz83fVkKpZ_DPkSglcR8XD-S8,3177
7
+ ops_copilot/tools/__init__.py,sha256=QmJZxov-R_jNqFpcDLkTIjC1OwpM24b5GK8H02MovOw,241
8
+ ops_copilot/tools/base.py,sha256=pqtcdf8VNJJ68eOx8nqyzOl36WkLNCN4-aYSY29VxOw,916
9
+ ops_copilot/tools/executor.py,sha256=212NtgpTIryvtxlL5zkYQP9IhekdRxn9D_P6N4aWwZ0,2353
10
+ ops_copilot/tools/registry.py,sha256=Fp48mFi7X7hLfsUM9ipoqb_xOqYmhcotqKW-CKJJM_s,2598
11
+ ops_copilot/tools/shell.py,sha256=hAdjIY8_mT4QUFUMQQbFwoYUWbFwKz9HbmfGzL6nWx8,1580
12
+ ops_copilot-0.1.0.dist-info/METADATA,sha256=t-4pD3fjbI8zn2uwckvZGhxq5u_7WvhT9N57bn7mVPQ,6614
13
+ ops_copilot-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
14
+ ops_copilot-0.1.0.dist-info/licenses/LICENSE,sha256=WdZfrCBxqj0eY04I5UNmeuqGPeyWa4YGULcHq6cWXS8,1072
15
+ ops_copilot-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Benjamin Jornet
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.