puli-plg 0.1.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,123 @@
1
+ import argparse
2
+ import asyncio
3
+ import json
4
+ import logging
5
+ from typing import List
6
+ from mcp.server.fastmcp import FastMCP, Context
7
+
8
+ from puli_mcp_server.mcp_server.models import ChangeSet
9
+ from puli_mcp_server.proxy_client import ProxyClient
10
+ from puli_mcp_server.embedding_client.client import EmbeddingClient
11
+ from puli_mcp_server.embedding_client.config import EmbeddingConfig
12
+ from puli_mcp_server.llm_agent.llm_agent import LLMAgent
13
+ from puli_mcp_server.llm_agent.config import LLMAgentConfig
14
+ from puli_mcp_server.llm_agent.models import LLMQueryRequest
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ mcp = FastMCP("code-reviewer")
19
+
20
+ # Initialize clients with remote configuration
21
+ proxy_client = ProxyClient()
22
+
23
+ # Initialize embedding client with remote config
24
+ embedding_config = EmbeddingConfig.from_remote(proxy_client.mcp_config["config"])
25
+ embedding_client = EmbeddingClient(config=embedding_config)
26
+
27
+ # Initialize LLM agent with remote config and prompts
28
+ llm_config = LLMAgentConfig.from_remote(
29
+ proxy_client.mcp_config["config"],
30
+ proxy_client.mcp_config["prompts"]
31
+ )
32
+ llm_agent = LLMAgent(config=llm_config)
33
+
34
+
35
+ def log_configuration():
36
+ """Log MCP configuration after logging is properly initialized."""
37
+ logger.info("=" * 60)
38
+ logger.info("MCP Configuration pulled from proxy:")
39
+ logger.info(" Config keys: %s", list(proxy_client.mcp_config.get("config", {}).keys()))
40
+ logger.info(" Prompt keys: %s", list(proxy_client.mcp_config.get("prompts", {}).keys()))
41
+ logger.info("=" * 60)
42
+ logger.info("Embedding Config - Model: %s", embedding_config.model)
43
+ logger.info("LLM Config - Provider: %s, Model: %s, Temperature: %s",
44
+ llm_config.provider, llm_config.model, llm_config.temperature)
45
+ logger.info("System prompt length: %d characters", len(llm_config.system_prompt))
46
+ logger.info("=" * 60)
47
+
48
+
49
+ # --- 3. Define the Tool ---
50
+
51
+ @mcp.tool()
52
+ async def puli_herd(change_sets: List[ChangeSet], ctx: Context) -> str:
53
+ """
54
+ Puli Guard is a tool that analyzes a list of code changes for security risks, complexity, and
55
+ infrastructure impact by comparing them with historical incidents.
56
+ """
57
+
58
+ results = []
59
+
60
+ for i, change_set in enumerate(change_sets, 1):
61
+ await ctx.info(f"Analyzing change set {i}: {change_set.goal}")
62
+
63
+ # 1. Create embedding vector
64
+ embedding_text = change_set.to_embedding_string()
65
+ vector = embedding_client.generate_embedding(embedding_text)
66
+
67
+ # 2. Query proxy for similar historical incidents and relevant chaos patterns (in parallel)
68
+ similar_incidents, similar_chaos_patterns = await asyncio.gather(
69
+ asyncio.to_thread(proxy_client.search_incidents, query_vector=vector),
70
+ asyncio.to_thread(proxy_client.search_chaos_patterns, query_vector=vector)
71
+ )
72
+
73
+ # 3. Create LLM query request and get analysis
74
+ query_request = LLMQueryRequest(
75
+ change_set=change_set,
76
+ historical_incidents=similar_incidents,
77
+ relevant_chaos_patterns=similar_chaos_patterns
78
+ )
79
+ risk_assessment = await llm_agent.query(query_request)
80
+ results.append(risk_assessment.to_str())
81
+
82
+ return "\n\n---\n\n".join(results)
83
+
84
+
85
+ class MockContext:
86
+ """Mock context for CLI mode that prints info messages to stdout."""
87
+ async def info(self, msg: str):
88
+ print(f"[INFO] {msg}")
89
+
90
+
91
+ async def run_from_file(file_path: str) -> str:
92
+ """Run analysis from a JSON file containing change sets."""
93
+ with open(file_path) as f:
94
+ data = json.load(f)
95
+
96
+ change_sets = [ChangeSet(**cs) for cs in data]
97
+ return await puli_herd(change_sets, MockContext())
98
+
99
+
100
+ def main():
101
+ # Configure logging
102
+ logging.basicConfig(
103
+ level=logging.INFO,
104
+ format='[%(asctime)s] %(levelname)s - %(name)s - %(message)s',
105
+ datefmt='%Y-%m-%d %H:%M:%S'
106
+ )
107
+
108
+ # Log configuration after logging is set up
109
+ log_configuration()
110
+
111
+ parser = argparse.ArgumentParser(description="Puli Code Reviewer")
112
+ parser.add_argument("--file", "-f", help="JSON file with change sets (CLI mode)")
113
+ args = parser.parse_args()
114
+
115
+ if args.file:
116
+ result = asyncio.run(run_from_file(args.file))
117
+ print(result)
118
+ else:
119
+ mcp.run() # Original MCP mode
120
+
121
+
122
+ if __name__ == "__main__":
123
+ main()
@@ -0,0 +1,3 @@
1
+ from .client import ProxyClient
2
+
3
+ __all__ = ["ProxyClient"]
@@ -0,0 +1,264 @@
1
+ from typing import List, Optional, Dict, Any
2
+ import urllib.request
3
+ import urllib.error
4
+ import json
5
+ import logging
6
+
7
+ from puli_models import IncidentQueryResult, ChaosPatternQueryResult
8
+
9
+ from .config import ProxyConfig
10
+ from .token_manager import get_machine_id, read_token, write_token, clear_token
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class ProxyClient:
16
+ """Client for interacting with the Puli proxy service."""
17
+
18
+ def __init__(self, config: Optional[ProxyConfig] = None):
19
+ """
20
+ Initialize the proxy client.
21
+
22
+ Args:
23
+ config: ProxyConfig instance. If None, loads from environment variables.
24
+ """
25
+ self.config = config or ProxyConfig.from_env()
26
+ self._ensure_authenticated()
27
+ self.mcp_config = self._fetch_mcp_config()
28
+
29
+ def _ensure_authenticated(self) -> None:
30
+ """Load existing JWT or register to obtain one."""
31
+ token = read_token()
32
+ if token:
33
+ self.config.jwt_token = token
34
+ return
35
+ self._register()
36
+
37
+ def _register(self) -> None:
38
+ """Register this machine and store the returned JWT."""
39
+ machine_id = get_machine_id()
40
+ url = f"{self.config.base_url}/auth/register"
41
+ headers = {
42
+ "X-API-Key": self.config.api_key,
43
+ "Content-Type": "application/json",
44
+ }
45
+ if self.config.identity_token:
46
+ headers["Authorization"] = f"Bearer {self.config.identity_token}"
47
+
48
+ body = json.dumps({"machine_id": machine_id}).encode()
49
+ req = urllib.request.Request(url, data=body, headers=headers, method="POST")
50
+
51
+ try:
52
+ with urllib.request.urlopen(req) as response:
53
+ result = json.loads(response.read().decode())
54
+ except urllib.error.HTTPError as e:
55
+ error_body = e.read().decode() if e.fp else ""
56
+ raise RuntimeError(f"Registration failed: {e.code} - {error_body}") from e
57
+
58
+ token = result["token"]
59
+ write_token(token)
60
+ self.config.jwt_token = token
61
+
62
+ def _fetch_mcp_config(self) -> Dict[str, Any]:
63
+ """Fetch MCP configuration from the proxy after authentication."""
64
+ try:
65
+ return self._request("GET", "/config/mcp")
66
+ except RuntimeError as e:
67
+ # If config fetch fails, return empty config (allows local dev fallback)
68
+ if "404" in str(e) or "500" in str(e):
69
+ logger.warning("Failed to fetch MCP config from proxy: %s", e)
70
+ logger.warning("Falling back to empty config. Client will use from_env() methods for local development.")
71
+ return {"config": {}, "prompts": {}}
72
+ raise
73
+
74
+ def _request(
75
+ self,
76
+ method: str,
77
+ path: str,
78
+ data: Optional[dict] = None,
79
+ ) -> dict:
80
+ """Make an HTTP request to the proxy with auto-retry on 401."""
81
+ url = f"{self.config.base_url}{path}"
82
+ headers = {
83
+ "Content-Type": "application/json",
84
+ }
85
+
86
+ if self.config.jwt_token:
87
+ headers["X-Auth-Token"] = self.config.jwt_token
88
+
89
+ if self.config.identity_token:
90
+ headers["Authorization"] = f"Bearer {self.config.identity_token}"
91
+
92
+ body = json.dumps(data).encode() if data else None
93
+ request = urllib.request.Request(url, data=body, headers=headers, method=method)
94
+
95
+ try:
96
+ with urllib.request.urlopen(request) as response:
97
+ return json.loads(response.read().decode())
98
+ except urllib.error.HTTPError as e:
99
+ if e.code == 401:
100
+ # Token expired or invalid — re-register once and retry
101
+ clear_token()
102
+ self._register()
103
+ return self._retry_request(method, url, data)
104
+ error_body = e.read().decode() if e.fp else ""
105
+ raise RuntimeError(f"Proxy request failed: {e.code} - {error_body}") from e
106
+
107
+ def _retry_request(
108
+ self,
109
+ method: str,
110
+ url: str,
111
+ data: Optional[dict] = None,
112
+ ) -> dict:
113
+ """Single retry after re-registration (no infinite loop)."""
114
+ headers = {
115
+ "Content-Type": "application/json",
116
+ }
117
+
118
+ if self.config.jwt_token:
119
+ headers["X-Auth-Token"] = self.config.jwt_token
120
+
121
+ if self.config.identity_token:
122
+ headers["Authorization"] = f"Bearer {self.config.identity_token}"
123
+
124
+ body = json.dumps(data).encode() if data else None
125
+ request = urllib.request.Request(url, data=body, headers=headers, method=method)
126
+
127
+ try:
128
+ with urllib.request.urlopen(request) as response:
129
+ return json.loads(response.read().decode())
130
+ except urllib.error.HTTPError as e:
131
+ error_body = e.read().decode() if e.fp else ""
132
+ raise RuntimeError(f"Proxy request failed after retry: {e.code} - {error_body}") from e
133
+
134
+ def _search(
135
+ self,
136
+ query_vector: List[float],
137
+ url: str,
138
+ limit: int = 10,
139
+ filter_expr: Optional[str] = None,
140
+ output_fields: Optional[List[str]] = None,
141
+ ) -> List[Any]:
142
+ """
143
+ Search for similar incidents by vector.
144
+
145
+ Args:
146
+ query_vector: The embedding vector to search with.
147
+ limit: Maximum number of results to return.
148
+ filter_expr: Optional filter expression.
149
+ output_fields: Fields to include in results.
150
+
151
+ Returns:
152
+ List of matching IncidentQueryResult objects.
153
+ """
154
+ data = {
155
+ "query_vector": query_vector,
156
+ "limit": limit,
157
+ }
158
+ if filter_expr:
159
+ data["filter"] = filter_expr
160
+ if output_fields:
161
+ data["output_fields"] = output_fields
162
+
163
+ response = self._request("POST", url, data)
164
+ return response.get("results", [])
165
+
166
+ def search_incidents(
167
+ self,
168
+ query_vector: List[float],
169
+ limit: int = 10,
170
+ filter_expr: Optional[str] = None,
171
+ output_fields: Optional[List[str]] = None,
172
+ ) -> List[IncidentQueryResult]:
173
+ results = self._search(query_vector, "/incidents/search", limit, filter_expr, output_fields)
174
+ return [IncidentQueryResult.from_dict(r) for r in results]
175
+
176
+ def search_chaos_patterns(
177
+ self,
178
+ query_vector: List[float],
179
+ limit: int = 10,
180
+ filter_expr: Optional[str] = None,
181
+ output_fields: Optional[List[str]] = None,
182
+ ) -> List[ChaosPatternQueryResult]:
183
+ results = self._search(query_vector, "/chaos-patterns/search", limit, filter_expr, output_fields)
184
+ return [ChaosPatternQueryResult.from_dict(r) for r in results]
185
+
186
+ def query(
187
+ self,
188
+ filter_expr: str,
189
+ output_fields: Optional[List[str]] = None,
190
+ limit: int = 100,
191
+ ) -> List[IncidentQueryResult]:
192
+ """
193
+ Query incidents by filter expression (no vector search).
194
+
195
+ Args:
196
+ filter_expr: Filter expression.
197
+ output_fields: Fields to include in results.
198
+ limit: Maximum number of results.
199
+
200
+ Returns:
201
+ List of matching IncidentQueryResult objects.
202
+ """
203
+ data = {
204
+ "filter": filter_expr,
205
+ "limit": limit,
206
+ }
207
+ if output_fields:
208
+ data["output_fields"] = output_fields
209
+
210
+ response = self._request("POST", "/incidents/query", data)
211
+ return [IncidentQueryResult.from_dict(r) for r in response.get("results", [])]
212
+
213
+ def insert(self, incidents: List[Dict[str, Any]]) -> Dict[str, Any]:
214
+ """
215
+ Insert incidents into the collection.
216
+
217
+ Args:
218
+ incidents: List of incident dictionaries to insert.
219
+
220
+ Returns:
221
+ Insert result with IDs of inserted records.
222
+ """
223
+ data = {"data": incidents}
224
+ return self._request("POST", "/incidents/insert", data)
225
+
226
+ def get_by_id(self, incident_id: str) -> Optional[Dict[str, Any]]:
227
+ """
228
+ Get a single incident by ID.
229
+
230
+ Args:
231
+ incident_id: The incident ID to retrieve.
232
+
233
+ Returns:
234
+ Incident dictionary or None if not found.
235
+ """
236
+ try:
237
+ return self._request("GET", f"/incidents/{incident_id}")
238
+ except RuntimeError as e:
239
+ if "404" in str(e):
240
+ return None
241
+ raise
242
+
243
+ def delete(self, ids: List[str]) -> Dict[str, Any]:
244
+ """
245
+ Delete incidents by IDs.
246
+
247
+ Args:
248
+ ids: List of incident IDs to delete.
249
+
250
+ Returns:
251
+ Delete result.
252
+ """
253
+ data = {"ids": ids}
254
+ return self._request("DELETE", "/incidents", data)
255
+
256
+ def count(self) -> int:
257
+ """
258
+ Get the total number of incidents in the collection.
259
+
260
+ Returns:
261
+ Number of incidents.
262
+ """
263
+ response = self._request("GET", "/incidents/count")
264
+ return response.get("count", 0)
@@ -0,0 +1,74 @@
1
+ import os
2
+ from dataclasses import dataclass, field
3
+ from pathlib import Path
4
+
5
+ from google.auth.transport.requests import Request
6
+ from google.oauth2 import service_account
7
+
8
+ # Bundled service account key path (inside the package)
9
+ _CREDENTIALS_PATH = Path(__file__).parent.parent / "credentials" / "service-account.json"
10
+
11
+ # Default Cloud Run URL
12
+ _DEFAULT_PROXY_URL = "https://puli-proxy-gpvaoh5hka-uw.a.run.app"
13
+
14
+
15
+ def _load_credentials(target_audience: str) -> service_account.IDTokenCredentials | None:
16
+ """Load GCP credentials from the bundled service account key."""
17
+ sa_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", str(_CREDENTIALS_PATH))
18
+
19
+ if not Path(sa_path).exists():
20
+ return None
21
+
22
+ return service_account.IDTokenCredentials.from_service_account_file(
23
+ sa_path, target_audience=target_audience
24
+ )
25
+
26
+
27
+ @dataclass
28
+ class ProxyConfig:
29
+ """Configuration for the proxy client."""
30
+
31
+ base_url: str
32
+ api_key: str
33
+ _credentials: service_account.IDTokenCredentials | None = field(
34
+ default=None, repr=False
35
+ )
36
+ _jwt_token: str | None = field(default=None, repr=False)
37
+
38
+ @property
39
+ def identity_token(self) -> str | None:
40
+ """Get a valid identity token, refreshing automatically if expired."""
41
+ if self._credentials is None:
42
+ return None
43
+
44
+ if not self._credentials.valid:
45
+ self._credentials.refresh(Request())
46
+
47
+ return self._credentials.token
48
+
49
+ @property
50
+ def jwt_token(self) -> str | None:
51
+ return self._jwt_token
52
+
53
+ @jwt_token.setter
54
+ def jwt_token(self, value: str | None) -> None:
55
+ self._jwt_token = value
56
+
57
+ @classmethod
58
+ def from_env(cls) -> "ProxyConfig":
59
+ """Load configuration from environment variables."""
60
+ base_url = os.environ.get("PROXY_BASE_URL", _DEFAULT_PROXY_URL)
61
+ if not base_url:
62
+ raise ValueError("PROXY_BASE_URL environment variable is required")
63
+
64
+ api_key = os.environ.get("PROXY_API_KEY", "puli-proxy-api-key")
65
+ if not api_key:
66
+ raise ValueError("PROXY_API_KEY environment variable is required")
67
+
68
+ credentials = _load_credentials(target_audience=base_url)
69
+
70
+ return cls(
71
+ base_url=base_url.rstrip("/"),
72
+ api_key=api_key,
73
+ _credentials=credentials,
74
+ )
@@ -0,0 +1,36 @@
1
+ import uuid
2
+ from pathlib import Path
3
+
4
+ _PULI_DIR = Path.home() / ".puli"
5
+ _MACHINE_ID_FILE = _PULI_DIR / "machine_id"
6
+ _TOKEN_FILE = _PULI_DIR / "token"
7
+
8
+
9
+ def _ensure_dir() -> None:
10
+ _PULI_DIR.mkdir(mode=0o700, exist_ok=True)
11
+
12
+
13
+ def get_machine_id() -> str:
14
+ _ensure_dir()
15
+ if _MACHINE_ID_FILE.exists():
16
+ return _MACHINE_ID_FILE.read_text().strip()
17
+ machine_id = str(uuid.uuid4())
18
+ _MACHINE_ID_FILE.write_text(machine_id)
19
+ return machine_id
20
+
21
+
22
+ def read_token() -> str | None:
23
+ if _TOKEN_FILE.exists():
24
+ token = _TOKEN_FILE.read_text().strip()
25
+ return token or None
26
+ return None
27
+
28
+
29
+ def write_token(token: str) -> None:
30
+ _ensure_dir()
31
+ _TOKEN_FILE.write_text(token)
32
+
33
+
34
+ def clear_token() -> None:
35
+ if _TOKEN_FILE.exists():
36
+ _TOKEN_FILE.unlink()
@@ -0,0 +1,16 @@
1
+ {
2
+ "change_sets": [
3
+ {
4
+ "goal": "Add chaos_scenario field to RiskAssessment model to track chaos testing scenarios",
5
+ "changes": [
6
+ {
7
+ "file_path": "src/puli_mcp_server/llm_agent/models.py",
8
+ "change_type": "modify",
9
+ "diff_content": "@@ -87,6 +87,11 @@ class RiskAssessment(BaseModel):\n technical_finding: TechnicalFinding\n business_context: BusinessContext\n \n consequence: str = Field(\n ..., \n description=\"Description: What happens to the user or the business. Style Rule: Be strictly factual. No drama. No hyperbole. Example: \\\"User is double-charged. Support ticket generated.\\\" (NOT \\\"Catastrophic failure destroys trust\\\").\"\n )\n+\n+ chaos_scenario: Optional[str] = Field(\n+ None,\n+ description=\"Description of the chaos scenario that was run to test this risk assessment.\"\n+ )\n \n historical_incident: Optional[RealIncident] = Field(\n None, \n description=\"Only populate if a famous/known incident matches this exact failure pattern.\"\n )"
10
+ }
11
+ ],
12
+ "related_infrastructure": "Pydantic Models, LLM Agent",
13
+ "additional_context": "Added optional field to track chaos engineering scenarios used in risk assessment validation"
14
+ }
15
+ ]
16
+ }
@@ -0,0 +1,9 @@
1
+ from .incidents import Incident, IncidentQueryResult
2
+ from .chaos_patterns import ChaosPattern, ChaosPatternQueryResult
3
+
4
+ __all__ = [
5
+ "Incident",
6
+ "IncidentQueryResult",
7
+ "ChaosPattern",
8
+ "ChaosPatternQueryResult",
9
+ ]
@@ -0,0 +1,89 @@
1
+ from dataclasses import dataclass
2
+ from typing import List
3
+
4
+
5
+ @dataclass
6
+ class ChaosPattern:
7
+ """Represents a chaos pattern record in the Zilliz collection."""
8
+
9
+ id: str
10
+ vector: List[float]
11
+ name: str
12
+ category: str
13
+ description: str
14
+ cause: str
15
+ symptoms: str
16
+ technology: str | None = None
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: dict) -> "ChaosPattern":
20
+ """Create a ChaosPattern from a dictionary."""
21
+ return cls(
22
+ id=data.get("id"),
23
+ vector=list(data.get("vector", [])),
24
+ name=data.get("name"),
25
+ category=data.get("category"),
26
+ description=data.get("description"),
27
+ cause=data.get("cause"),
28
+ symptoms=data.get("symptoms"),
29
+ technology=data.get("technology"),
30
+ )
31
+
32
+ def to_dict(self) -> dict:
33
+ """Convert to dictionary for insertion."""
34
+ return {
35
+ "id": self.id,
36
+ "vector": self.vector,
37
+ "name": self.name,
38
+ "category": self.category,
39
+ "description": self.description,
40
+ "cause": self.cause,
41
+ "symptoms": self.symptoms,
42
+ "technology": self.technology,
43
+ }
44
+
45
+
46
+ @dataclass
47
+ class ChaosPatternQueryResult:
48
+ """Represents a lightweight chaos pattern record for search/query results."""
49
+
50
+ name: str
51
+ category: str
52
+ description: str
53
+ cause: str
54
+ symptoms: str
55
+ technology: str | None = None
56
+
57
+ @classmethod
58
+ def from_dict(cls, data: dict) -> "ChaosPatternQueryResult":
59
+ """Create a ChaosPatternQueryResult from a dictionary."""
60
+ return cls(
61
+ name=data.get("name"),
62
+ category=data.get("category"),
63
+ description=data.get("description"),
64
+ cause=data.get("cause"),
65
+ symptoms=data.get("symptoms"),
66
+ technology=data.get("technology"),
67
+ )
68
+
69
+ def to_dict(self) -> dict:
70
+ """Convert to dictionary for serialization."""
71
+ return {
72
+ "name": self.name,
73
+ "category": self.category,
74
+ "description": self.description,
75
+ "cause": self.cause,
76
+ "symptoms": self.symptoms,
77
+ "technology": self.technology,
78
+ }
79
+
80
+ def to_prompt_str(self) -> str:
81
+ """Returns a string representation of the chaos pattern query result."""
82
+ prompt = f"Title: {self.name}\n" \
83
+ + f"Category: {self.category}\n" \
84
+ + f"Description: {self.description}\n" \
85
+ + f"Root Causes: {self.cause}\n" \
86
+ + f"Common Symptoms: {self.symptoms}\n"
87
+ if self.technology:
88
+ prompt += f"Technology: {self.technology}\n"
89
+ return prompt