github2gerrit 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,298 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # SPDX-FileCopyrightText: 2025 The Linux Foundation
3
+ """
4
+ Gerrit REST helper with retry, timeout, and transient error detection.
5
+
6
+ This module provides a thin, typed wrapper for issuing Gerrit REST calls
7
+ with:
8
+ - Bounded retries using exponential backoff with jitter
9
+ - Request timeouts
10
+ - Transient error classification (HTTP 5xx/429 and common network errors)
11
+ - Centralized URL handling via GerritUrlBuilder
12
+
13
+ It prefers pygerrit2 when available and falls back to urllib otherwise.
14
+
15
+ Usage:
16
+ from github2gerrit.gerrit_rest import build_client_for_host
17
+
18
+ client = build_client_for_host("gerrit.example.org", timeout=8.0)
19
+ items = client.get("/changes/?q=project:foo limit:1&n=1&o=CURRENT_REVISION")
20
+
21
+ Design notes:
22
+ - The surface area is intentionally small and focused on JSON calls.
23
+ - Authentication (HTTP basic auth) is supported when username/password are
24
+ provided.
25
+ - Base URLs should be created via the URL builder to respect base paths.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import base64
31
+ import json
32
+ import logging
33
+ import os
34
+ import urllib.error
35
+ import urllib.parse
36
+ import urllib.request
37
+ from dataclasses import dataclass
38
+ from typing import Any
39
+ from typing import Final
40
+ from urllib.parse import urljoin
41
+
42
+ from .external_api import ApiType
43
+ from .external_api import RetryPolicy
44
+ from .external_api import external_api_call
45
+ from .gerrit_urls import create_gerrit_url_builder
46
+ from .utils import log_exception_conditionally
47
+
48
+
49
+ log = logging.getLogger("github2gerrit.gerrit_rest")
50
+
51
+ # Optional pygerrit2 import
52
+ try: # pragma: no cover - exercised indirectly by tests that monkeypatch
53
+ from pygerrit2 import GerritRestAPI as _PygerritRestApi # type: ignore[import-not-found, unused-ignore]
54
+ from pygerrit2 import HTTPBasicAuth as _PygerritHttpAuth # type: ignore[import-not-found, unused-ignore]
55
+ except Exception: # pragma: no cover - absence path
56
+ _PygerritRestApi = None
57
+ _PygerritHttpAuth = None
58
+
59
+
60
+ _MSG_PYGERRIT2_REQUIRED_AUTH: Final[str] = "pygerrit2 is required for HTTP authentication"
61
+
62
+ _TRANSIENT_ERR_SUBSTRINGS: Final[tuple[str, ...]] = (
63
+ "timed out",
64
+ "temporarily unavailable",
65
+ "temporary failure",
66
+ "connection reset",
67
+ "connection aborted",
68
+ "broken pipe",
69
+ "connection refused",
70
+ "bad gateway",
71
+ "service unavailable",
72
+ "gateway timeout",
73
+ )
74
+
75
+
76
+ # Removed individual retry logic functions - now using centralized framework
77
+
78
+
79
+ class GerritRestError(RuntimeError):
80
+ """Raised for non-retryable REST errors or exhausted retries."""
81
+
82
+
83
+ @dataclass(frozen=True)
84
+ class _Auth:
85
+ user: str
86
+ password: str
87
+
88
+
89
+ def _mask_secret(s: str) -> str:
90
+ if not s:
91
+ return s
92
+ if len(s) <= 4:
93
+ return "****"
94
+ return s[:2] + "*" * (len(s) - 4) + s[-2:]
95
+
96
+
97
+ class GerritRestClient:
98
+ """
99
+ Simple JSON REST client for Gerrit with retry/timeout handling.
100
+
101
+ - If pygerrit2 is available, use it directly (preferred).
102
+ - Otherwise, use urllib with manual request construction.
103
+ """
104
+
105
+ def __init__(
106
+ self,
107
+ *,
108
+ base_url: str,
109
+ auth: tuple[str, str] | None = None,
110
+ timeout: float = 8.0,
111
+ max_attempts: int = 5,
112
+ ) -> None:
113
+ # Normalize base URL to end with '/'
114
+ base_url = base_url.rstrip("/") + "/"
115
+ self._base_url: str = base_url
116
+ self._timeout: float = float(timeout)
117
+ self._attempts: int = int(max_attempts)
118
+ self._retry_policy = RetryPolicy(
119
+ max_attempts=max_attempts,
120
+ timeout=timeout,
121
+ )
122
+ self._auth: _Auth | None = None
123
+ if auth and auth[0] and auth[1]:
124
+ self._auth = _Auth(auth[0], auth[1])
125
+
126
+ # Build pygerrit client if library is present; otherwise None
127
+ if _PygerritRestApi is not None:
128
+ if self._auth is not None:
129
+ if _PygerritHttpAuth is None:
130
+ raise GerritRestError(_MSG_PYGERRIT2_REQUIRED_AUTH)
131
+ self._client: Any = _PygerritRestApi(
132
+ url=self._base_url, auth=_PygerritHttpAuth(self._auth.user, self._auth.password)
133
+ )
134
+ else:
135
+ self._client = _PygerritRestApi(url=self._base_url)
136
+ else:
137
+ self._client = None
138
+
139
+ log.debug(
140
+ "GerritRestClient(base_url=%s, timeout=%.1fs, attempts=%d, auth_user=%s)",
141
+ self._base_url,
142
+ self._timeout,
143
+ self._attempts,
144
+ self._auth.user if self._auth else "",
145
+ )
146
+
147
+ # Public API
148
+
149
+ def get(self, path: str) -> Any:
150
+ """HTTP GET, returning parsed JSON."""
151
+ return self._request_json_with_retry("GET", path)
152
+
153
+ def post(self, path: str, data: Any | None = None) -> Any:
154
+ """HTTP POST with JSON payload, returning parsed JSON."""
155
+ return self._request_json_with_retry("POST", path, data=data)
156
+
157
+ def put(self, path: str, data: Any | None = None) -> Any:
158
+ """HTTP PUT with JSON payload, returning parsed JSON."""
159
+ return self._request_json_with_retry("PUT", path, data=data)
160
+
161
+ # Internal helpers
162
+
163
+ def _request_json_with_retry(self, method: str, path: str, data: Any | None = None) -> Any:
164
+ """Perform a JSON request with retry using external API framework."""
165
+
166
+ @external_api_call(ApiType.GERRIT_REST, f"{method.lower()}", policy=self._retry_policy)
167
+ def _do_request() -> Any:
168
+ return self._request_json(method, path, data)
169
+
170
+ return _do_request()
171
+
172
+ def _request_json(self, method: str, path: str, data: Any | None = None) -> Any:
173
+ """Perform a JSON request (retry logic handled by decorator)."""
174
+ if not path:
175
+ msg_required = "path is required"
176
+ raise ValueError(msg_required)
177
+
178
+ # Normalize absolute vs relative path
179
+ rel = path[1:] if path.startswith("/") else path
180
+ url = urljoin(self._base_url, rel)
181
+
182
+ try:
183
+ if self._client is not None and method == "GET" and data is None:
184
+ # pygerrit2 path: only using GET to keep behavior consistent with current usage
185
+ log.debug("Gerrit REST GET via pygerrit2: %s", url)
186
+ # pygerrit2.get expects a relative path; keep 'path' argument as-is
187
+ return self._client.get("/" + rel if not path.startswith("/") else path)
188
+
189
+ # urllib path (or non-GET with pygerrit2 absent)
190
+ headers = {"Accept": "application/json"}
191
+ body_bytes: bytes | None = None
192
+ if data is not None:
193
+ headers["Content-Type"] = "application/json"
194
+ body_bytes = json.dumps(data).encode("utf-8")
195
+
196
+ if self._auth is not None:
197
+ token = base64.b64encode(f"{self._auth.user}:{self._auth.password}".encode()).decode("ascii")
198
+ headers["Authorization"] = f"Basic {token}"
199
+ scheme = urllib.parse.urlparse(url).scheme
200
+ if scheme not in ("http", "https"):
201
+ msg_scheme = f"Unsupported URL scheme for Gerrit REST: {scheme}"
202
+ raise GerritRestError(msg_scheme)
203
+ req = urllib.request.Request(url, data=body_bytes, method=method, headers=headers)
204
+ log.debug("Gerrit REST %s %s (auth_user=%s)", method, url, self._auth.user if self._auth else "")
205
+
206
+ with urllib.request.urlopen(req, timeout=self._timeout) as resp:
207
+ status = getattr(resp, "status", None)
208
+ content = resp.read()
209
+ # Gerrit prepends ")]}'" in JSON responses to prevent JSON hijacking; strip if present
210
+ text = content.decode("utf-8", errors="replace")
211
+ text = _strip_xssi_guard(text)
212
+ return _json_loads(text)
213
+
214
+ except urllib.error.HTTPError as http_exc:
215
+ status = getattr(http_exc, "code", None)
216
+ msg = f"Gerrit REST {method} {url} failed with HTTP {status}"
217
+ log_exception_conditionally(log, msg)
218
+ raise GerritRestError(msg) from http_exc
219
+
220
+ except Exception as exc:
221
+ msg = f"Gerrit REST {method} {url} failed: {exc}"
222
+ log_exception_conditionally(log, msg)
223
+ raise GerritRestError(msg) from exc
224
+
225
+ def __repr__(self) -> str: # pragma: no cover - convenience
226
+ masked = ""
227
+ if self._auth is not None:
228
+ masked = f"{self._auth.user}:{_mask_secret(self._auth.password)}@"
229
+ return f"GerritRestClient(base_url='{self._base_url}', auth='{masked}')"
230
+
231
+
232
+ def _json_loads(s: str) -> Any:
233
+ try:
234
+ return json.loads(s)
235
+ except Exception as exc:
236
+ msg_parse = f"Failed to parse JSON response: {exc}"
237
+ raise GerritRestError(msg_parse) from exc
238
+
239
+
240
+ def _strip_xssi_guard(text: str) -> str:
241
+ # Gerrit typically prefixes JSON with XSSI guard ")]}'"
242
+ # Strip the guard and any trailing newline after it.
243
+ if text.startswith(")]}'"):
244
+ # Common patterns: ")]}'\n" or ")]}'\r\n"
245
+ if text[4:6] == "\r\n":
246
+ return text[6:]
247
+ if text[4:5] == "\n":
248
+ return text[5:]
249
+ return text[4:]
250
+ return text
251
+
252
+
253
+ # Removed _sleep function - using centralized retry framework
254
+
255
+
256
+ def build_client_for_host(
257
+ host: str,
258
+ *,
259
+ timeout: float = 8.0,
260
+ max_attempts: int = 5,
261
+ http_user: str | None = None,
262
+ http_password: str | None = None,
263
+ ) -> GerritRestClient:
264
+ """
265
+ Build a GerritRestClient for a given host using the centralized URL builder.
266
+
267
+ - Uses auto-discovered or environment-provided base path.
268
+ - Reads HTTP auth from arguments or environment:
269
+ GERRIT_HTTP_USER / GERRIT_HTTP_PASSWORD
270
+ If user is not provided, falls back to GERRIT_SSH_USER_G2G per project norms.
271
+
272
+ Args:
273
+ host: Gerrit hostname (no scheme)
274
+ timeout: Request timeout in seconds.
275
+ max_attempts: Max retry attempts for transient failures.
276
+ http_user: Optional HTTP user.
277
+ http_password: Optional HTTP password/token.
278
+
279
+ Returns:
280
+ Configured GerritRestClient.
281
+ """
282
+ builder = create_gerrit_url_builder(host)
283
+ base_url = builder.api_url()
284
+ user = (
285
+ (http_user or "").strip()
286
+ or os.getenv("GERRIT_HTTP_USER", "").strip()
287
+ or os.getenv("GERRIT_SSH_USER_G2G", "").strip()
288
+ )
289
+ passwd = (http_password or "").strip() or os.getenv("GERRIT_HTTP_PASSWORD", "").strip()
290
+ auth: tuple[str, str] | None = (user, passwd) if user and passwd else None
291
+ return GerritRestClient(base_url=base_url, auth=auth, timeout=timeout, max_attempts=max_attempts)
292
+
293
+
294
+ __all__ = [
295
+ "GerritRestClient",
296
+ "GerritRestError",
297
+ "build_client_for_host",
298
+ ]
@@ -12,11 +12,143 @@ from __future__ import annotations
12
12
 
13
13
  import logging
14
14
  import os
15
+ import urllib.error
16
+ import urllib.parse
17
+ import urllib.request
18
+ from typing import Any
15
19
  from urllib.parse import urljoin
16
20
 
17
21
 
18
22
  log = logging.getLogger(__name__)
19
23
 
24
+ _BASE_PATH_CACHE: dict[str, str] = {}
25
+
26
+
27
+ class _NoRedirect(urllib.request.HTTPRedirectHandler):
28
+ def http_error_301(self, req: Any, fp: Any, code: int, msg: str, headers: Any) -> Any:
29
+ return fp
30
+
31
+ def http_error_302(self, req: Any, fp: Any, code: int, msg: str, headers: Any) -> Any:
32
+ return fp
33
+
34
+ def http_error_303(self, req: Any, fp: Any, code: int, msg: str, headers: Any) -> Any:
35
+ return fp
36
+
37
+ def http_error_307(self, req: Any, fp: Any, code: int, msg: str, headers: Any) -> Any:
38
+ return fp
39
+
40
+ def http_error_308(self, req: Any, fp: Any, code: int, msg: str, headers: Any) -> Any:
41
+ return fp
42
+
43
+
44
+ def _discover_base_path_for_host(host: str, timeout: float = 5.0) -> str:
45
+ """
46
+ Discover Gerrit HTTP base path for the given host by probing redirects.
47
+
48
+ Strategy:
49
+ - Probe '/dashboard/self' and '/' without following redirects.
50
+ - If redirected, infer base path from the first non-endpoint path segment.
51
+ - If no redirect and 200 OK at '/dashboard/self', assume no base path.
52
+ - Cache discovery results per host for the process lifetime.
53
+ """
54
+ try:
55
+ if not host:
56
+ return ""
57
+ cached = _BASE_PATH_CACHE.get(host)
58
+ if cached is not None:
59
+ return cached
60
+
61
+ opener = urllib.request.build_opener(_NoRedirect)
62
+ opener.addheaders = [("User-Agent", "github2gerrit/urls-discovery")]
63
+ probes = ["/dashboard/self", "/"]
64
+ known_endpoints = {
65
+ "changes",
66
+ "accounts",
67
+ "dashboard",
68
+ "c",
69
+ "q",
70
+ "admin",
71
+ "login",
72
+ "settings",
73
+ "plugins",
74
+ "Documentation",
75
+ }
76
+
77
+ for scheme in ("https", "http"):
78
+ for probe in probes:
79
+ url = f"{scheme}://{host}{probe}"
80
+ parsed_url = urllib.parse.urlparse(url)
81
+ if parsed_url.scheme not in ("https", "http"):
82
+ log.debug("Skipping non-HTTP(S) probe URL: %s", url)
83
+ continue
84
+ try:
85
+ resp = opener.open(url, timeout=timeout)
86
+ code = getattr(resp, "getcode", lambda: None)() or getattr(resp, "status", 0)
87
+ # If we reached the page without redirects
88
+ if code == 200:
89
+ _BASE_PATH_CACHE[host] = ""
90
+ log.info("Gerrit base path: ''")
91
+ return ""
92
+ # Handle 3xx responses when redirects are disabled (no-redirect opener)
93
+ if code in (301, 302, 303, 307, 308):
94
+ headers = getattr(resp, "headers", {}) or {}
95
+ loc = headers.get("Location") or headers.get("location") or ""
96
+ if loc:
97
+ # Normalize to absolute path
98
+ parsed = urllib.parse.urlparse(loc)
99
+ path = (
100
+ parsed.path
101
+ if parsed.scheme or parsed.netloc
102
+ else urllib.parse.urlparse(f"https://{host}{loc}").path
103
+ )
104
+ # Determine candidate base path
105
+ segs = [s for s in path.split("/") if s]
106
+ base = ""
107
+ if segs:
108
+ first = segs[0]
109
+ if first not in known_endpoints:
110
+ base = first
111
+ _BASE_PATH_CACHE[host] = base
112
+ log.info("Gerrit base path: '%s'", base)
113
+ return base
114
+ # If we get any other non-redirect response, try next probe
115
+ continue
116
+ except urllib.error.HTTPError as e:
117
+ # HTTPError doubles as the response; capture Location for redirects
118
+ code = e.code
119
+ loc = e.headers.get("Location") or e.headers.get("location") or ""
120
+ if code in (301, 302, 303, 307, 308) and loc:
121
+ # Normalize to absolute path
122
+ parsed = urllib.parse.urlparse(loc)
123
+ path = (
124
+ parsed.path
125
+ if parsed.scheme or parsed.netloc
126
+ else urllib.parse.urlparse(f"https://{host}{loc}").path
127
+ )
128
+ # Determine candidate base path
129
+ segs = [s for s in path.split("/") if s]
130
+ base = ""
131
+ if segs:
132
+ first = segs[0]
133
+ if first not in known_endpoints:
134
+ base = first
135
+ _BASE_PATH_CACHE[host] = base
136
+ log.info("Gerrit base path: '%s'", base)
137
+ return base
138
+ # Non-redirect error; try next probe
139
+ continue
140
+ except Exception as exc:
141
+ log.debug("Gerrit base path probe failed for %s%s: %s", host, probe, exc)
142
+ continue
143
+
144
+ except Exception as exc:
145
+ log.debug("Gerrit base path discovery error for %s: %s", host, exc)
146
+ return ""
147
+ # Default if nothing conclusive after exhausting all probes
148
+ _BASE_PATH_CACHE[host] = ""
149
+ log.info("Gerrit base path: ''")
150
+ return ""
151
+
20
152
 
21
153
  class GerritUrlBuilder:
22
154
  """
@@ -35,7 +167,7 @@ class GerritUrlBuilder:
35
167
  Args:
36
168
  host: Gerrit hostname (without protocol)
37
169
  base_path: Optional base path override. If None, reads from
38
- GERRIT_HTTP_BASE_PATH environment variable.
170
+ GERRIT_HTTP_BASE_PATH environment variable or discovers dynamically.
39
171
  """
40
172
  self.host = host.strip()
41
173
 
@@ -43,7 +175,12 @@ class GerritUrlBuilder:
43
175
  if base_path is not None:
44
176
  self._base_path = base_path.strip().strip("/")
45
177
  else:
46
- self._base_path = os.getenv("GERRIT_HTTP_BASE_PATH", "").strip().strip("/")
178
+ env_bp = os.getenv("GERRIT_HTTP_BASE_PATH", "").strip().strip("/")
179
+ if env_bp:
180
+ self._base_path = env_bp
181
+ else:
182
+ discovered = _discover_base_path_for_host(self.host)
183
+ self._base_path = discovered.strip().strip("/")
47
184
 
48
185
  log.debug(
49
186
  "GerritUrlBuilder initialized for host=%s, base_path='%s'",
@@ -149,73 +286,33 @@ class GerritUrlBuilder:
149
286
 
150
287
  def get_api_url_candidates(self, endpoint: str = "") -> list[str]:
151
288
  """
152
- Get a list of candidate API URLs for fallback scenarios.
289
+ Get the single API URL based on discovered/configured base path.
153
290
 
154
- This method returns URLs in order of preference:
155
- 1. URL with configured base path (if any)
156
- 2. URL with /r/ base path (common fallback)
157
- 3. URL with no base path (root)
291
+ This method avoids hard-coded fallbacks by relying on dynamic detection
292
+ of Gerrit's HTTP base path (or explicit configuration).
158
293
 
159
294
  Args:
160
295
  endpoint: API endpoint path
161
296
 
162
297
  Returns:
163
- List of candidate URLs to try
298
+ A single API URL to use
164
299
  """
165
- candidates = []
166
-
167
- # Primary URL with configured base path
168
- if self.has_base_path:
169
- candidates.append(self.api_url(endpoint))
170
-
171
- # Common fallback: /r/ base path
172
- if self._base_path != "r":
173
- candidates.append(self.api_url(endpoint, base_path_override="r"))
174
-
175
- # Final fallback: no base path
176
- if self.has_base_path:
177
- candidates.append(self.api_url(endpoint, base_path_override=""))
178
-
179
- # If no base path was configured, add the primary URL
180
- if not self.has_base_path:
181
- candidates.append(self.api_url(endpoint))
182
-
183
- return candidates
300
+ return [self.api_url(endpoint)]
184
301
 
185
302
  def get_hook_url_candidates(self, hook_name: str) -> list[str]:
186
303
  """
187
- Get a list of candidate hook URLs for fallback scenarios.
304
+ Get the single hook URL based on discovered/configured base path.
188
305
 
189
- This method returns URLs in order of preference for downloading hooks:
190
- 1. URL with configured base path (if any)
191
- 2. URL with /r/ base path (common for hooks)
192
- 3. URL with no base path (root)
306
+ This method avoids hard-coded fallbacks by relying on dynamic detection
307
+ of Gerrit's HTTP base path (or explicit configuration).
193
308
 
194
309
  Args:
195
310
  hook_name: Name of the hook to download
196
311
 
197
312
  Returns:
198
- List of candidate URLs to try
313
+ A single hook URL to use
199
314
  """
200
- candidates = []
201
-
202
- # Primary URL with configured base path
203
- if self.has_base_path:
204
- candidates.append(self.hook_url(hook_name))
205
-
206
- # Common fallback: /r/ base path (very common for hooks)
207
- if self._base_path != "r":
208
- candidates.append(self.hook_url(hook_name, base_path_override="r"))
209
-
210
- # Final fallback: no base path
211
- if self.has_base_path:
212
- candidates.append(self.hook_url(hook_name, base_path_override=""))
213
-
214
- # If no base path was configured, add the primary URL
215
- if not self.has_base_path:
216
- candidates.append(self.hook_url(hook_name))
217
-
218
- return candidates
315
+ return [self.hook_url(hook_name)]
219
316
 
220
317
  def get_web_base_path(self, base_path_override: str | None = None) -> str:
221
318
  """