github2gerrit 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@ import json
14
14
  import logging
15
15
  import os
16
16
  import re
17
+ import urllib.error
17
18
  import urllib.parse
18
19
  import urllib.request
19
20
  from collections.abc import Iterable
@@ -274,42 +275,6 @@ class DuplicateDetector:
274
275
  log.debug("Failed to create Gerrit REST client: %s", exc)
275
276
  return None
276
277
 
277
- def _build_gerrit_rest_client_with_r_path(self, gerrit_host: str) -> object | None:
278
- """Build a Gerrit REST API client with /r/ base path for fallback."""
279
- if GerritRestAPI is None:
280
- return None
281
-
282
- # Create centralized URL builder with /r/ base path override
283
- url_builder = create_gerrit_url_builder(gerrit_host, "r")
284
- fallback_url = url_builder.api_url()
285
-
286
- http_user = os.getenv("GERRIT_HTTP_USER", "").strip() or os.getenv("GERRIT_SSH_USER_G2G", "").strip()
287
- http_pass = os.getenv("GERRIT_HTTP_PASSWORD", "").strip()
288
-
289
- try:
290
- if http_user and http_pass:
291
- if HTTPBasicAuth is None:
292
- return None
293
- # Type ignore needed for dynamic import returning Any
294
- return GerritRestAPI( # type: ignore[no-any-return]
295
- url=fallback_url, auth=HTTPBasicAuth(http_user, http_pass)
296
- )
297
- else:
298
- # Type ignore needed for dynamic import returning Any
299
- return GerritRestAPI(url=fallback_url) # type: ignore[no-any-return]
300
- except Exception as exc:
301
- log.debug("Failed to create Gerrit REST client with /r/ path: %s", exc)
302
- return None
303
-
304
- def check_gerrit_for_existing_change(self, gh: GitHubContext) -> bool:
305
- """Deprecated: GitHub-Hash/Gerrit REST based duplicate detection disabled.
306
-
307
- Always returns False. Scoring-based duplicate detection will be implemented
308
- in check_for_duplicates.
309
- """
310
- log.debug("Gerrit REST duplicate check disabled")
311
- return False
312
-
313
278
  @staticmethod
314
279
  def _generate_github_change_hash(gh: GitHubContext) -> str:
315
280
  """Generate a deterministic hash for a GitHub PR to identify duplicates.
@@ -460,37 +425,6 @@ class DuplicateDetector:
460
425
  else:
461
426
  return []
462
427
  except urllib.error.HTTPError as exc:
463
- if exc.code == 404:
464
- # Try with /r/ base path fallback using centralized URL builder
465
- fallback_builder = create_gerrit_url_builder(gerrit_host, "r")
466
- fallback_api_base = fallback_builder.api_url().rstrip("/")
467
- fallback_url = url_.replace(api_base, fallback_api_base)
468
- if fallback_url != url_:
469
- log.debug(
470
- "Trying Gerrit query with /r/ base path: %s",
471
- fallback_url,
472
- )
473
- try:
474
- req_fallback = urllib.request.Request(fallback_url, headers=headers)
475
- with urllib.request.urlopen(req_fallback, timeout=8) as resp:
476
- raw = resp.read().decode("utf-8", errors="replace")
477
- # Strip Gerrit's XSSI prefix if present
478
- if raw.startswith(")]}'"):
479
- raw = raw.split("\n", 1)[1] if "\n" in raw else ""
480
- data = json.loads(raw or "[]")
481
- if isinstance(data, list):
482
- # Update successful base path for display URL construction
483
- nonlocal successful_base_path
484
- successful_base_path = "r"
485
- return data
486
- else:
487
- return []
488
- except Exception as fallback_exc:
489
- log.debug(
490
- "Gerrit fallback query also failed for %s: %s",
491
- fallback_url,
492
- fallback_exc,
493
- )
494
428
  log.debug("Gerrit query failed for %s: %s", url_, exc)
495
429
  return []
496
430
  except Exception as exc:
@@ -0,0 +1,518 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # SPDX-FileCopyrightText: 2025 The Linux Foundation
3
+
4
+ """
5
+ Central external API call framework for github2gerrit.
6
+
7
+ This module provides a unified framework for all external API calls with:
8
+ - Consistent retry logic with exponential backoff and jitter
9
+ - Uniform logging patterns across all API types
10
+ - Metrics collection for timing and success/failure tracking
11
+ - Configurable timeout and retry behavior per API type
12
+
13
+ The framework supports different API types:
14
+ - GitHub API calls
15
+ - Gerrit REST API calls
16
+ - SSH operations (keyscan, git operations)
17
+ - HTTP downloads (curl-based fetches)
18
+
19
+ Design principles:
20
+ - Non-intrusive: wraps existing implementations without breaking changes
21
+ - Configurable: different retry/timeout policies per API type
22
+ - Observable: consistent logging and metrics collection
23
+ - Resilient: handles transient failures with appropriate backoff
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import functools
29
+ import logging
30
+ import random
31
+ import socket
32
+ import time
33
+ import urllib.error
34
+ from collections.abc import Callable
35
+ from dataclasses import dataclass
36
+ from dataclasses import field
37
+ from enum import Enum
38
+ from typing import Any
39
+ from typing import NoReturn
40
+ from typing import TypeVar
41
+
42
+ from .utils import log_exception_conditionally
43
+
44
+
45
+ log = logging.getLogger("github2gerrit.external_api")
46
+
47
+ # Error message constants to comply with TRY003
48
+ _MSG_RUNTIME_NO_EXCEPTION = "External API call failed without exception"
49
+ _MSG_CURL_FAILED = "curl failed"
50
+ _MSG_CURL_NO_OUTPUT = "curl completed but output file was not created"
51
+ _MSG_CURL_TIMEOUT = "curl download timed out"
52
+ _MSG_CURL_DOWNLOAD_FAILED = "curl download failed"
53
+
54
+ # Complete error message templates
55
+ _MSG_CURL_FAILED_WITH_RC = "{}: (rc={}): {}"
56
+ _MSG_CURL_TIMEOUT_WITH_TIME = "{} after {}s"
57
+ _MSG_CURL_DOWNLOAD_FAILED_WITH_EXC = "{}: {}"
58
+
59
+ _T = TypeVar("_T")
60
+
61
+
62
+ class ApiType(Enum):
63
+ """Types of external APIs supported by the framework."""
64
+
65
+ GITHUB = "github"
66
+ GERRIT_REST = "gerrit_rest"
67
+ SSH = "ssh"
68
+ HTTP_DOWNLOAD = "http_download"
69
+
70
+
71
+ @dataclass(frozen=True)
72
+ class RetryPolicy:
73
+ """Configuration for retry behavior of external API calls."""
74
+
75
+ max_attempts: int = 5
76
+ base_delay: float = 0.5
77
+ max_delay: float = 6.0
78
+ timeout: float = 10.0
79
+ jitter_factor: float = 0.5
80
+
81
+
82
+ @dataclass
83
+ class ApiMetrics:
84
+ """Metrics collected for external API calls."""
85
+
86
+ total_calls: int = 0
87
+ successful_calls: int = 0
88
+ failed_calls: int = 0
89
+ total_duration: float = 0.0
90
+ retry_attempts: int = 0
91
+ timeout_errors: int = 0
92
+ transient_errors: int = 0
93
+
94
+
95
+ @dataclass
96
+ class ApiCallContext:
97
+ """Context information for an external API call."""
98
+
99
+ api_type: ApiType
100
+ operation: str
101
+ target: str # URL, hostname, etc.
102
+ attempt: int = 1
103
+ start_time: float = field(default_factory=time.time)
104
+ policy: RetryPolicy = field(default_factory=RetryPolicy)
105
+
106
+
107
+ # Global metrics storage - in production this could be replaced with
108
+ # proper metrics collection system (Prometheus, etc.)
109
+ _METRICS: dict[ApiType, ApiMetrics] = {api_type: ApiMetrics() for api_type in ApiType}
110
+
111
+
112
+ def get_api_metrics(api_type: ApiType) -> ApiMetrics:
113
+ """Get metrics for a specific API type."""
114
+ return _METRICS[api_type]
115
+
116
+
117
+ def reset_api_metrics(api_type: ApiType | None = None) -> None:
118
+ """Reset metrics for a specific API type or all types."""
119
+ if api_type is not None:
120
+ _METRICS[api_type] = ApiMetrics()
121
+ else:
122
+ for api_type in ApiType:
123
+ _METRICS[api_type] = ApiMetrics()
124
+
125
+
126
+ def _calculate_backoff_delay(
127
+ attempt: int,
128
+ base_delay: float,
129
+ max_delay: float,
130
+ jitter_factor: float,
131
+ ) -> float:
132
+ """Calculate exponential backoff delay with jitter."""
133
+ delay = min(base_delay * (2 ** max(0, attempt - 1)), max_delay)
134
+ jitter = random.uniform(0.0, delay * jitter_factor) # noqa: S311
135
+ return float(delay + jitter)
136
+
137
+
138
+ def _is_transient_error(exc: BaseException, api_type: ApiType) -> bool:
139
+ """Determine if an exception represents a transient error."""
140
+ # Common network/timeout errors
141
+ if isinstance(
142
+ exc,
143
+ socket.timeout
144
+ | TimeoutError
145
+ | ConnectionResetError
146
+ | ConnectionAbortedError
147
+ | BrokenPipeError
148
+ | ConnectionRefusedError,
149
+ ):
150
+ return True
151
+
152
+ # HTTP-specific errors
153
+ if isinstance(exc, urllib.error.HTTPError):
154
+ status = getattr(exc, "code", None)
155
+ # Retry on 5xx and 429 (rate limit)
156
+ return (500 <= status <= 599) or (status == 429) if status else False
157
+
158
+ if isinstance(exc, urllib.error.URLError):
159
+ reason = getattr(exc, "reason", None)
160
+ if isinstance(
161
+ reason,
162
+ socket.timeout | TimeoutError | ConnectionResetError | ConnectionAbortedError,
163
+ ):
164
+ return True
165
+
166
+ # GitHub API specific errors (if PyGithub is available)
167
+ if api_type == ApiType.GITHUB:
168
+ # Import GitHub exception types to check isinstance
169
+ try:
170
+ from .github_api import GithubExceptionType
171
+ from .github_api import RateLimitExceededExceptionType
172
+ except ImportError:
173
+ GithubExceptionType = type(None) # type: ignore[misc,assignment]
174
+ RateLimitExceededExceptionType = type(None) # type: ignore[misc,assignment]
175
+
176
+ # Check by class name or isinstance for mock/test exceptions
177
+ exc_name = exc.__class__.__name__
178
+ if exc_name in ("RateLimitExceededException", "RateLimitExceededExceptionType") or isinstance(
179
+ exc, RateLimitExceededExceptionType
180
+ ):
181
+ return True
182
+ if exc_name in ("GithubException", "GithubExceptionType") or isinstance(exc, GithubExceptionType):
183
+ status = getattr(exc, "status", None)
184
+ if isinstance(status, int) and 500 <= status <= 599:
185
+ return True
186
+ # Check for rate limit in error data
187
+ data = getattr(exc, "data", "")
188
+ if isinstance(data, str | bytes):
189
+ try:
190
+ text = data.decode("utf-8") if isinstance(data, bytes) else data
191
+ if "rate limit" in text.lower():
192
+ return True
193
+ except Exception:
194
+ # Ignore decode errors when checking for rate limit text
195
+ log.debug("Failed to decode GitHub API error data for rate limit check")
196
+ return False
197
+
198
+ # Gerrit REST specific errors - check for wrapped HTTP errors
199
+ if api_type == ApiType.GERRIT_REST:
200
+ # Handle GerritRestError that wraps HTTP errors
201
+ if "HTTP 5" in str(exc) or "HTTP 429" in str(exc):
202
+ return True
203
+ # Also check for original HTTP errors that caused the GerritRestError
204
+ if hasattr(exc, "__cause__") and isinstance(exc.__cause__, urllib.error.HTTPError):
205
+ status = getattr(exc.__cause__, "code", None)
206
+ return (500 <= status <= 599) or (status == 429) if status else False
207
+
208
+ # SSH/Git command errors - check stderr for common transient messages
209
+ if api_type == ApiType.SSH:
210
+ msg = str(exc).lower()
211
+ transient_patterns = [
212
+ "connection timed out",
213
+ "connection refused",
214
+ "temporarily unavailable",
215
+ "network is unreachable",
216
+ "host key verification failed", # May be transient during discovery
217
+ "broken pipe",
218
+ "connection reset",
219
+ ]
220
+ return any(pattern in msg for pattern in transient_patterns)
221
+
222
+ # String-based detection for other error types
223
+ msg = str(exc).lower()
224
+ transient_substrings = [
225
+ "timed out",
226
+ "temporarily unavailable",
227
+ "temporary failure",
228
+ "connection reset",
229
+ "connection aborted",
230
+ "broken pipe",
231
+ "connection refused",
232
+ "bad gateway",
233
+ "service unavailable",
234
+ "gateway timeout",
235
+ "rate limit",
236
+ ]
237
+ return any(substring in msg for substring in transient_substrings)
238
+
239
+
240
+ def _update_metrics(
241
+ api_type: ApiType,
242
+ context: ApiCallContext,
243
+ success: bool,
244
+ exc: BaseException | None = None,
245
+ ) -> None:
246
+ """Update metrics for an API call."""
247
+ metrics = _METRICS[api_type]
248
+ metrics.total_calls += 1
249
+ duration = time.time() - context.start_time
250
+ metrics.total_duration += duration
251
+
252
+ if success:
253
+ metrics.successful_calls += 1
254
+ else:
255
+ metrics.failed_calls += 1
256
+
257
+ if context.attempt > 1:
258
+ metrics.retry_attempts += context.attempt - 1
259
+
260
+ if exc:
261
+ if isinstance(exc, socket.timeout | TimeoutError):
262
+ metrics.timeout_errors += 1
263
+ elif _is_transient_error(exc, api_type):
264
+ metrics.transient_errors += 1
265
+
266
+
267
+ def external_api_call(
268
+ api_type: ApiType,
269
+ operation: str,
270
+ *,
271
+ policy: RetryPolicy | None = None,
272
+ target: str = "",
273
+ ) -> Callable[[Callable[..., _T]], Callable[..., _T]]:
274
+ """
275
+ Decorator for external API calls with unified retry/logging/metrics.
276
+
277
+ Args:
278
+ api_type: Type of external API being called
279
+ operation: Description of the operation (e.g., "get_pull_request")
280
+ policy: Custom retry policy, uses default if not provided
281
+ target: Target identifier (URL, hostname, etc.) for logging
282
+
283
+ Returns:
284
+ Decorated function with retry/logging/metrics capabilities
285
+
286
+ Example:
287
+ @external_api_call(ApiType.GITHUB, "get_pull_request")
288
+ def get_pull_request(repo, number):
289
+ return repo.get_pull(number)
290
+ """
291
+ if policy is None:
292
+ # Default policies per API type
293
+ default_policies = {
294
+ ApiType.GITHUB: RetryPolicy(max_attempts=5, timeout=10.0),
295
+ ApiType.GERRIT_REST: RetryPolicy(max_attempts=5, timeout=8.0),
296
+ ApiType.SSH: RetryPolicy(max_attempts=3, timeout=15.0),
297
+ ApiType.HTTP_DOWNLOAD: RetryPolicy(max_attempts=3, timeout=30.0),
298
+ }
299
+ policy = default_policies.get(api_type, RetryPolicy())
300
+
301
+ def decorator(func: Callable[..., _T]) -> Callable[..., _T]:
302
+ @functools.wraps(func)
303
+ def wrapper(*args: Any, **kwargs: Any) -> _T:
304
+ context = ApiCallContext(
305
+ api_type=api_type,
306
+ operation=operation,
307
+ target=target,
308
+ policy=policy,
309
+ )
310
+
311
+ last_exc: BaseException | None = None
312
+
313
+ for attempt in range(1, policy.max_attempts + 1):
314
+ context.attempt = attempt
315
+
316
+ try:
317
+ log.debug(
318
+ "[%s] %s attempt %d/%d: %s %s",
319
+ api_type.value,
320
+ operation,
321
+ attempt,
322
+ policy.max_attempts,
323
+ target,
324
+ f"(timeout={policy.timeout}s)" if policy.timeout else "",
325
+ )
326
+
327
+ # Call the actual function
328
+ result = func(*args, **kwargs)
329
+ except BaseException as exc:
330
+ last_exc = exc
331
+ duration = time.time() - context.start_time
332
+
333
+ # Determine if this error should be retried
334
+ is_transient = _is_transient_error(exc, api_type)
335
+ is_final_attempt = attempt == policy.max_attempts
336
+
337
+ if is_transient and not is_final_attempt:
338
+ # Retry case
339
+ delay = _calculate_backoff_delay(
340
+ attempt,
341
+ policy.base_delay,
342
+ policy.max_delay,
343
+ policy.jitter_factor,
344
+ )
345
+ log.warning(
346
+ "[%s] %s attempt %d/%d failed (%.2fs): %s; retrying in %.2fs",
347
+ api_type.value,
348
+ operation,
349
+ attempt,
350
+ policy.max_attempts,
351
+ duration,
352
+ exc,
353
+ delay,
354
+ )
355
+ time.sleep(delay)
356
+ continue
357
+ # Final failure - log and re-raise
358
+ reason = "final attempt" if is_final_attempt else "non-retryable"
359
+ log_exception_conditionally(
360
+ log,
361
+ f"[{api_type.value}] {operation} failed ({reason}) "
362
+ f"after {attempt} attempt(s) in {duration:.2f}s: {target}",
363
+ )
364
+ _update_metrics(api_type, context, success=False, exc=exc)
365
+ raise
366
+ else:
367
+ # Success - log and update metrics
368
+ duration = time.time() - context.start_time
369
+ log.debug(
370
+ "[%s] %s succeeded in %.2fs: %s",
371
+ api_type.value,
372
+ operation,
373
+ duration,
374
+ target,
375
+ )
376
+ _update_metrics(api_type, context, success=True)
377
+ return result
378
+
379
+ # Should not reach here, but handle it gracefully
380
+ if last_exc:
381
+ _update_metrics(api_type, context, success=False, exc=last_exc)
382
+ raise last_exc
383
+
384
+ # Helper function for raising runtime error
385
+ def _raise_no_exception() -> NoReturn:
386
+ raise RuntimeError(_MSG_RUNTIME_NO_EXCEPTION + f": {operation}")
387
+
388
+ _raise_no_exception()
389
+
390
+ return wrapper
391
+
392
+ return decorator
393
+
394
+
395
+ def log_api_metrics_summary() -> None:
396
+ """Log a summary of all API metrics."""
397
+ log.info("=== External API Metrics Summary ===")
398
+ for api_type in ApiType:
399
+ metrics = _METRICS[api_type]
400
+ if metrics.total_calls == 0:
401
+ continue
402
+
403
+ success_rate = (metrics.successful_calls / metrics.total_calls * 100) if metrics.total_calls > 0 else 0.0
404
+ avg_duration = metrics.total_duration / metrics.total_calls if metrics.total_calls > 0 else 0.0
405
+
406
+ log.info(
407
+ "[%s] Calls: %d, Success: %.1f%%, Avg Duration: %.2fs, Retries: %d, Timeouts: %d, Transient Errors: %d",
408
+ api_type.value,
409
+ metrics.total_calls,
410
+ success_rate,
411
+ avg_duration,
412
+ metrics.retry_attempts,
413
+ metrics.timeout_errors,
414
+ metrics.transient_errors,
415
+ )
416
+
417
+
418
+ def curl_download(
419
+ url: str,
420
+ output_path: str,
421
+ *,
422
+ timeout: float = 30.0,
423
+ follow_redirects: bool = True,
424
+ silent: bool = True,
425
+ policy: RetryPolicy | None = None,
426
+ ) -> tuple[int, str]:
427
+ """
428
+ Download a file using curl with centralized retry/logging/metrics.
429
+
430
+ Args:
431
+ url: URL to download from
432
+ output_path: Local path to save the file
433
+ timeout: Request timeout in seconds
434
+ follow_redirects: Whether to follow HTTP redirects
435
+ silent: Whether to suppress curl progress output
436
+ policy: Custom retry policy
437
+
438
+ Returns:
439
+ Tuple of (return_code, http_status_code)
440
+
441
+ Raises:
442
+ RuntimeError: If curl command fails after retries
443
+ """
444
+ import subprocess
445
+ from pathlib import Path
446
+
447
+ if policy is None:
448
+ policy = RetryPolicy(max_attempts=3, timeout=timeout)
449
+
450
+ @external_api_call(ApiType.HTTP_DOWNLOAD, "curl_download", target=url, policy=policy)
451
+ def _do_curl() -> tuple[int, str]:
452
+ cmd = ["curl"]
453
+
454
+ if follow_redirects:
455
+ cmd.append("-fL")
456
+ else:
457
+ cmd.append("-f")
458
+
459
+ if silent:
460
+ cmd.append("-sS")
461
+
462
+ # Write HTTP status code to stdout
463
+ cmd.extend(["-w", "%{http_code}"])
464
+
465
+ # Set timeout
466
+ cmd.extend(["--max-time", str(int(timeout))])
467
+
468
+ # Output file
469
+ cmd.extend(["-o", output_path])
470
+
471
+ # URL (last argument)
472
+ cmd.append(url)
473
+
474
+ # Helper functions for raising errors to comply with TRY301
475
+ def _raise_curl_failed(returncode: int, error_msg: str) -> None:
476
+ raise RuntimeError(_MSG_CURL_FAILED_WITH_RC.format(_MSG_CURL_FAILED, returncode, error_msg))
477
+
478
+ def _raise_no_output() -> None:
479
+ raise RuntimeError(_MSG_CURL_NO_OUTPUT)
480
+
481
+ def _raise_timeout(timeout_val: float) -> None:
482
+ raise TimeoutError(_MSG_CURL_TIMEOUT_WITH_TIME.format(_MSG_CURL_TIMEOUT, timeout_val))
483
+
484
+ def _raise_download_failed(exc: Exception) -> None:
485
+ raise RuntimeError(_MSG_CURL_DOWNLOAD_FAILED_WITH_EXC.format(_MSG_CURL_DOWNLOAD_FAILED, exc)) from exc
486
+
487
+ # Initialize variables
488
+ result = None
489
+ http_status = "unknown"
490
+
491
+ try:
492
+ result = subprocess.run( # noqa: S603
493
+ cmd,
494
+ capture_output=True,
495
+ text=True,
496
+ timeout=timeout + 5, # Give subprocess a bit more time than curl
497
+ check=False,
498
+ )
499
+
500
+ # Extract HTTP status code from stdout
501
+ http_status = result.stdout.strip() if result.stdout else "unknown"
502
+
503
+ if result.returncode != 0:
504
+ error_msg = result.stderr.strip() if result.stderr else _MSG_CURL_FAILED
505
+ _raise_curl_failed(result.returncode, error_msg)
506
+
507
+ # Verify file was created
508
+ if not Path(output_path).exists():
509
+ _raise_no_output()
510
+
511
+ except subprocess.TimeoutExpired:
512
+ _raise_timeout(timeout)
513
+ except Exception as exc:
514
+ _raise_download_failed(exc)
515
+
516
+ return result.returncode if result else -1, http_status
517
+
518
+ return _do_curl()