github2gerrit 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@ import json
14
14
  import logging
15
15
  import os
16
16
  import re
17
+ import urllib.error
17
18
  import urllib.parse
18
19
  import urllib.request
19
20
  from collections.abc import Iterable
@@ -274,42 +275,6 @@ class DuplicateDetector:
274
275
  log.debug("Failed to create Gerrit REST client: %s", exc)
275
276
  return None
276
277
 
277
- def _build_gerrit_rest_client_with_r_path(self, gerrit_host: str) -> object | None:
278
- """Build a Gerrit REST API client with /r/ base path for fallback."""
279
- if GerritRestAPI is None:
280
- return None
281
-
282
- # Create centralized URL builder with /r/ base path override
283
- url_builder = create_gerrit_url_builder(gerrit_host, "r")
284
- fallback_url = url_builder.api_url()
285
-
286
- http_user = os.getenv("GERRIT_HTTP_USER", "").strip() or os.getenv("GERRIT_SSH_USER_G2G", "").strip()
287
- http_pass = os.getenv("GERRIT_HTTP_PASSWORD", "").strip()
288
-
289
- try:
290
- if http_user and http_pass:
291
- if HTTPBasicAuth is None:
292
- return None
293
- # Type ignore needed for dynamic import returning Any
294
- return GerritRestAPI( # type: ignore[no-any-return]
295
- url=fallback_url, auth=HTTPBasicAuth(http_user, http_pass)
296
- )
297
- else:
298
- # Type ignore needed for dynamic import returning Any
299
- return GerritRestAPI(url=fallback_url) # type: ignore[no-any-return]
300
- except Exception as exc:
301
- log.debug("Failed to create Gerrit REST client with /r/ path: %s", exc)
302
- return None
303
-
304
- def check_gerrit_for_existing_change(self, gh: GitHubContext) -> bool:
305
- """Deprecated: GitHub-Hash/Gerrit REST based duplicate detection disabled.
306
-
307
- Always returns False. Scoring-based duplicate detection will be implemented
308
- in check_for_duplicates.
309
- """
310
- log.debug("Gerrit REST duplicate check disabled")
311
- return False
312
-
313
278
  @staticmethod
314
279
  def _generate_github_change_hash(gh: GitHubContext) -> str:
315
280
  """Generate a deterministic hash for a GitHub PR to identify duplicates.
@@ -324,8 +289,6 @@ class DuplicateDetector:
324
289
  Returns:
325
290
  Hex-encoded SHA256 hash string (first 16 characters for readability)
326
291
  """
327
- import hashlib
328
-
329
292
  # Build hash input from stable, unique PR identifiers
330
293
  # Use server_url + repository + pr_number for global uniqueness
331
294
  hash_input = f"{gh.server_url}/{gh.repository}/pull/{gh.pr_number}"
@@ -460,37 +423,6 @@ class DuplicateDetector:
460
423
  else:
461
424
  return []
462
425
  except urllib.error.HTTPError as exc:
463
- if exc.code == 404:
464
- # Try with /r/ base path fallback using centralized URL builder
465
- fallback_builder = create_gerrit_url_builder(gerrit_host, "r")
466
- fallback_api_base = fallback_builder.api_url().rstrip("/")
467
- fallback_url = url_.replace(api_base, fallback_api_base)
468
- if fallback_url != url_:
469
- log.debug(
470
- "Trying Gerrit query with /r/ base path: %s",
471
- fallback_url,
472
- )
473
- try:
474
- req_fallback = urllib.request.Request(fallback_url, headers=headers)
475
- with urllib.request.urlopen(req_fallback, timeout=8) as resp:
476
- raw = resp.read().decode("utf-8", errors="replace")
477
- # Strip Gerrit's XSSI prefix if present
478
- if raw.startswith(")]}'"):
479
- raw = raw.split("\n", 1)[1] if "\n" in raw else ""
480
- data = json.loads(raw or "[]")
481
- if isinstance(data, list):
482
- # Update successful base path for display URL construction
483
- nonlocal successful_base_path
484
- successful_base_path = "r"
485
- return data
486
- else:
487
- return []
488
- except Exception as fallback_exc:
489
- log.debug(
490
- "Gerrit fallback query also failed for %s: %s",
491
- fallback_url,
492
- fallback_exc,
493
- )
494
426
  log.debug("Gerrit query failed for %s: %s", url_, exc)
495
427
  return []
496
428
  except Exception as exc:
@@ -0,0 +1,517 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # SPDX-FileCopyrightText: 2025 The Linux Foundation
3
+
4
+ """
5
+ Central external API call framework for github2gerrit.
6
+
7
+ This module provides a unified framework for all external API calls with:
8
+ - Consistent retry logic with exponential backoff and jitter
9
+ - Uniform logging patterns across all API types
10
+ - Metrics collection for timing and success/failure tracking
11
+ - Configurable timeout and retry behavior per API type
12
+
13
+ The framework supports different API types:
14
+ - GitHub API calls
15
+ - Gerrit REST API calls
16
+ - SSH operations (keyscan, git operations)
17
+ - HTTP downloads (curl-based fetches)
18
+
19
+ Design principles:
20
+ - Non-intrusive: wraps existing implementations without breaking changes
21
+ - Configurable: different retry/timeout policies per API type
22
+ - Observable: consistent logging and metrics collection
23
+ - Resilient: handles transient failures with appropriate backoff
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import functools
29
+ import logging
30
+ import random
31
+ import socket
32
+ import subprocess
33
+ import time
34
+ import urllib.error
35
+ from collections.abc import Callable
36
+ from dataclasses import dataclass
37
+ from dataclasses import field
38
+ from enum import Enum
39
+ from pathlib import Path
40
+ from typing import Any
41
+ from typing import NoReturn
42
+ from typing import TypeVar
43
+
44
+ from .utils import log_exception_conditionally
45
+
46
+
47
+ log = logging.getLogger("github2gerrit.external_api")
48
+
49
+ # Error message constants to comply with TRY003
50
+ _MSG_RUNTIME_NO_EXCEPTION = "External API call failed without exception"
51
+ _MSG_CURL_FAILED = "curl failed"
52
+ _MSG_CURL_NO_OUTPUT = "curl completed but output file was not created"
53
+ _MSG_CURL_TIMEOUT = "curl download timed out"
54
+ _MSG_CURL_DOWNLOAD_FAILED = "curl download failed"
55
+
56
+ # Complete error message templates
57
+ _MSG_CURL_FAILED_WITH_RC = "{}: (rc={}): {}"
58
+ _MSG_CURL_TIMEOUT_WITH_TIME = "{} after {}s"
59
+ _MSG_CURL_DOWNLOAD_FAILED_WITH_EXC = "{}: {}"
60
+
61
+ _T = TypeVar("_T")
62
+
63
+
64
+ class ApiType(Enum):
65
+ """Types of external APIs supported by the framework."""
66
+
67
+ GITHUB = "github"
68
+ GERRIT_REST = "gerrit_rest"
69
+ SSH = "ssh"
70
+ HTTP_DOWNLOAD = "http_download"
71
+
72
+
73
+ @dataclass(frozen=True)
74
+ class RetryPolicy:
75
+ """Configuration for retry behavior of external API calls."""
76
+
77
+ max_attempts: int = 5
78
+ base_delay: float = 0.5
79
+ max_delay: float = 6.0
80
+ timeout: float = 10.0
81
+ jitter_factor: float = 0.5
82
+
83
+
84
+ @dataclass
85
+ class ApiMetrics:
86
+ """Metrics collected for external API calls."""
87
+
88
+ total_calls: int = 0
89
+ successful_calls: int = 0
90
+ failed_calls: int = 0
91
+ total_duration: float = 0.0
92
+ retry_attempts: int = 0
93
+ timeout_errors: int = 0
94
+ transient_errors: int = 0
95
+
96
+
97
+ @dataclass
98
+ class ApiCallContext:
99
+ """Context information for an external API call."""
100
+
101
+ api_type: ApiType
102
+ operation: str
103
+ target: str # URL, hostname, etc.
104
+ attempt: int = 1
105
+ start_time: float = field(default_factory=time.time)
106
+ policy: RetryPolicy = field(default_factory=RetryPolicy)
107
+
108
+
109
+ # Global metrics storage - in production this could be replaced with
110
+ # proper metrics collection system (Prometheus, etc.)
111
+ _METRICS: dict[ApiType, ApiMetrics] = {api_type: ApiMetrics() for api_type in ApiType}
112
+
113
+
114
+ def get_api_metrics(api_type: ApiType) -> ApiMetrics:
115
+ """Get metrics for a specific API type."""
116
+ return _METRICS[api_type]
117
+
118
+
119
+ def reset_api_metrics(api_type: ApiType | None = None) -> None:
120
+ """Reset metrics for a specific API type or all types."""
121
+ if api_type is not None:
122
+ _METRICS[api_type] = ApiMetrics()
123
+ else:
124
+ for api_type in ApiType:
125
+ _METRICS[api_type] = ApiMetrics()
126
+
127
+
128
+ def _calculate_backoff_delay(
129
+ attempt: int,
130
+ base_delay: float,
131
+ max_delay: float,
132
+ jitter_factor: float,
133
+ ) -> float:
134
+ """Calculate exponential backoff delay with jitter."""
135
+ delay = min(base_delay * (2 ** max(0, attempt - 1)), max_delay)
136
+ jitter = random.uniform(0.0, delay * jitter_factor) # noqa: S311
137
+ return float(delay + jitter)
138
+
139
+
140
+ def _is_transient_error(exc: BaseException, api_type: ApiType) -> bool:
141
+ """Determine if an exception represents a transient error."""
142
+ # Common network/timeout errors
143
+ if isinstance(
144
+ exc,
145
+ socket.timeout
146
+ | TimeoutError
147
+ | ConnectionResetError
148
+ | ConnectionAbortedError
149
+ | BrokenPipeError
150
+ | ConnectionRefusedError,
151
+ ):
152
+ return True
153
+
154
+ # HTTP-specific errors
155
+ if isinstance(exc, urllib.error.HTTPError):
156
+ status = getattr(exc, "code", None)
157
+ # Retry on 5xx and 429 (rate limit)
158
+ return (500 <= status <= 599) or (status == 429) if status else False
159
+
160
+ if isinstance(exc, urllib.error.URLError):
161
+ reason = getattr(exc, "reason", None)
162
+ if isinstance(
163
+ reason,
164
+ socket.timeout | TimeoutError | ConnectionResetError | ConnectionAbortedError,
165
+ ):
166
+ return True
167
+
168
+ # GitHub API specific errors (if PyGithub is available)
169
+ if api_type == ApiType.GITHUB:
170
+ # Import GitHub exception types to check isinstance
171
+ try:
172
+ from .github_api import GithubExceptionType
173
+ from .github_api import RateLimitExceededExceptionType
174
+ except ImportError:
175
+ GithubExceptionType = type(None) # type: ignore[misc,assignment]
176
+ RateLimitExceededExceptionType = type(None) # type: ignore[misc,assignment]
177
+
178
+ # Check by class name or isinstance for mock/test exceptions
179
+ exc_name = exc.__class__.__name__
180
+ if exc_name in ("RateLimitExceededException", "RateLimitExceededExceptionType") or isinstance(
181
+ exc, RateLimitExceededExceptionType
182
+ ):
183
+ return True
184
+ if exc_name in ("GithubException", "GithubExceptionType") or isinstance(exc, GithubExceptionType):
185
+ status = getattr(exc, "status", None)
186
+ if isinstance(status, int) and 500 <= status <= 599:
187
+ return True
188
+ # Check for rate limit in error data
189
+ data = getattr(exc, "data", "")
190
+ if isinstance(data, str | bytes):
191
+ try:
192
+ text = data.decode("utf-8") if isinstance(data, bytes) else data
193
+ if "rate limit" in text.lower():
194
+ return True
195
+ except Exception:
196
+ # Ignore decode errors when checking for rate limit text
197
+ log.debug("Failed to decode GitHub API error data for rate limit check")
198
+ return False
199
+
200
+ # Gerrit REST specific errors - check for wrapped HTTP errors
201
+ if api_type == ApiType.GERRIT_REST:
202
+ # Handle GerritRestError that wraps HTTP errors
203
+ if "HTTP 5" in str(exc) or "HTTP 429" in str(exc):
204
+ return True
205
+ # Also check for original HTTP errors that caused the GerritRestError
206
+ if hasattr(exc, "__cause__") and isinstance(exc.__cause__, urllib.error.HTTPError):
207
+ status = getattr(exc.__cause__, "code", None)
208
+ return (500 <= status <= 599) or (status == 429) if status else False
209
+
210
+ # SSH/Git command errors - check stderr for common transient messages
211
+ if api_type == ApiType.SSH:
212
+ msg = str(exc).lower()
213
+ transient_patterns = [
214
+ "connection timed out",
215
+ "connection refused",
216
+ "temporarily unavailable",
217
+ "network is unreachable",
218
+ "host key verification failed", # May be transient during discovery
219
+ "broken pipe",
220
+ "connection reset",
221
+ ]
222
+ return any(pattern in msg for pattern in transient_patterns)
223
+
224
+ # String-based detection for other error types
225
+ msg = str(exc).lower()
226
+ transient_substrings = [
227
+ "timed out",
228
+ "temporarily unavailable",
229
+ "temporary failure",
230
+ "connection reset",
231
+ "connection aborted",
232
+ "broken pipe",
233
+ "connection refused",
234
+ "bad gateway",
235
+ "service unavailable",
236
+ "gateway timeout",
237
+ "rate limit",
238
+ ]
239
+ return any(substring in msg for substring in transient_substrings)
240
+
241
+
242
+ def _update_metrics(
243
+ api_type: ApiType,
244
+ context: ApiCallContext,
245
+ success: bool,
246
+ exc: BaseException | None = None,
247
+ ) -> None:
248
+ """Update metrics for an API call."""
249
+ metrics = _METRICS[api_type]
250
+ metrics.total_calls += 1
251
+ duration = time.time() - context.start_time
252
+ metrics.total_duration += duration
253
+
254
+ if success:
255
+ metrics.successful_calls += 1
256
+ else:
257
+ metrics.failed_calls += 1
258
+
259
+ if context.attempt > 1:
260
+ metrics.retry_attempts += context.attempt - 1
261
+
262
+ if exc:
263
+ if isinstance(exc, socket.timeout | TimeoutError):
264
+ metrics.timeout_errors += 1
265
+ elif _is_transient_error(exc, api_type):
266
+ metrics.transient_errors += 1
267
+
268
+
269
+ def external_api_call(
270
+ api_type: ApiType,
271
+ operation: str,
272
+ *,
273
+ policy: RetryPolicy | None = None,
274
+ target: str = "",
275
+ ) -> Callable[[Callable[..., _T]], Callable[..., _T]]:
276
+ """
277
+ Decorator for external API calls with unified retry/logging/metrics.
278
+
279
+ Args:
280
+ api_type: Type of external API being called
281
+ operation: Description of the operation (e.g., "get_pull_request")
282
+ policy: Custom retry policy, uses default if not provided
283
+ target: Target identifier (URL, hostname, etc.) for logging
284
+
285
+ Returns:
286
+ Decorated function with retry/logging/metrics capabilities
287
+
288
+ Example:
289
+ @external_api_call(ApiType.GITHUB, "get_pull_request")
290
+ def get_pull_request(repo, number):
291
+ return repo.get_pull(number)
292
+ """
293
+ if policy is None:
294
+ # Default policies per API type
295
+ default_policies = {
296
+ ApiType.GITHUB: RetryPolicy(max_attempts=5, timeout=10.0),
297
+ ApiType.GERRIT_REST: RetryPolicy(max_attempts=5, timeout=8.0),
298
+ ApiType.SSH: RetryPolicy(max_attempts=3, timeout=15.0),
299
+ ApiType.HTTP_DOWNLOAD: RetryPolicy(max_attempts=3, timeout=30.0),
300
+ }
301
+ policy = default_policies.get(api_type, RetryPolicy())
302
+
303
+ def decorator(func: Callable[..., _T]) -> Callable[..., _T]:
304
+ @functools.wraps(func)
305
+ def wrapper(*args: Any, **kwargs: Any) -> _T:
306
+ context = ApiCallContext(
307
+ api_type=api_type,
308
+ operation=operation,
309
+ target=target,
310
+ policy=policy,
311
+ )
312
+
313
+ last_exc: BaseException | None = None
314
+
315
+ for attempt in range(1, policy.max_attempts + 1):
316
+ context.attempt = attempt
317
+
318
+ try:
319
+ log.debug(
320
+ "[%s] %s attempt %d/%d: %s %s",
321
+ api_type.value,
322
+ operation,
323
+ attempt,
324
+ policy.max_attempts,
325
+ target,
326
+ f"(timeout={policy.timeout}s)" if policy.timeout else "",
327
+ )
328
+
329
+ # Call the actual function
330
+ result = func(*args, **kwargs)
331
+ except BaseException as exc:
332
+ last_exc = exc
333
+ duration = time.time() - context.start_time
334
+
335
+ # Determine if this error should be retried
336
+ is_transient = _is_transient_error(exc, api_type)
337
+ is_final_attempt = attempt == policy.max_attempts
338
+
339
+ if is_transient and not is_final_attempt:
340
+ # Retry case
341
+ delay = _calculate_backoff_delay(
342
+ attempt,
343
+ policy.base_delay,
344
+ policy.max_delay,
345
+ policy.jitter_factor,
346
+ )
347
+ log.warning(
348
+ "[%s] %s attempt %d/%d failed (%.2fs): %s; retrying in %.2fs",
349
+ api_type.value,
350
+ operation,
351
+ attempt,
352
+ policy.max_attempts,
353
+ duration,
354
+ exc,
355
+ delay,
356
+ )
357
+ time.sleep(delay)
358
+ continue
359
+ # Final failure - log and re-raise
360
+ reason = "final attempt" if is_final_attempt else "non-retryable"
361
+ log_exception_conditionally(
362
+ log,
363
+ f"[{api_type.value}] {operation} failed ({reason}) "
364
+ f"after {attempt} attempt(s) in {duration:.2f}s: {target}",
365
+ )
366
+ _update_metrics(api_type, context, success=False, exc=exc)
367
+ raise
368
+ else:
369
+ # Success - log and update metrics
370
+ duration = time.time() - context.start_time
371
+ log.debug(
372
+ "[%s] %s succeeded in %.2fs: %s",
373
+ api_type.value,
374
+ operation,
375
+ duration,
376
+ target,
377
+ )
378
+ _update_metrics(api_type, context, success=True)
379
+ return result
380
+
381
+ # Should not reach here, but handle it gracefully
382
+ if last_exc:
383
+ _update_metrics(api_type, context, success=False, exc=last_exc)
384
+ raise last_exc
385
+
386
+ # Helper function for raising runtime error
387
+ def _raise_no_exception() -> NoReturn:
388
+ raise RuntimeError(_MSG_RUNTIME_NO_EXCEPTION + f": {operation}")
389
+
390
+ _raise_no_exception()
391
+
392
+ return wrapper
393
+
394
+ return decorator
395
+
396
+
397
+ def log_api_metrics_summary() -> None:
398
+ """Log a summary of all API metrics."""
399
+ log.info("=== External API Metrics Summary ===")
400
+ for api_type in ApiType:
401
+ metrics = _METRICS[api_type]
402
+ if metrics.total_calls == 0:
403
+ continue
404
+
405
+ success_rate = (metrics.successful_calls / metrics.total_calls * 100) if metrics.total_calls > 0 else 0.0
406
+ avg_duration = metrics.total_duration / metrics.total_calls if metrics.total_calls > 0 else 0.0
407
+
408
+ log.info(
409
+ "[%s] Calls: %d, Success: %.1f%%, Avg Duration: %.2fs, Retries: %d, Timeouts: %d, Transient Errors: %d",
410
+ api_type.value,
411
+ metrics.total_calls,
412
+ success_rate,
413
+ avg_duration,
414
+ metrics.retry_attempts,
415
+ metrics.timeout_errors,
416
+ metrics.transient_errors,
417
+ )
418
+
419
+
420
+ def curl_download(
421
+ url: str,
422
+ output_path: str,
423
+ *,
424
+ timeout: float = 30.0,
425
+ follow_redirects: bool = True,
426
+ silent: bool = True,
427
+ policy: RetryPolicy | None = None,
428
+ ) -> tuple[int, str]:
429
+ """
430
+ Download a file using curl with centralized retry/logging/metrics.
431
+
432
+ Args:
433
+ url: URL to download from
434
+ output_path: Local path to save the file
435
+ timeout: Request timeout in seconds
436
+ follow_redirects: Whether to follow HTTP redirects
437
+ silent: Whether to suppress curl progress output
438
+ policy: Custom retry policy
439
+
440
+ Returns:
441
+ Tuple of (return_code, http_status_code)
442
+
443
+ Raises:
444
+ RuntimeError: If curl command fails after retries
445
+ """
446
+ if policy is None:
447
+ policy = RetryPolicy(max_attempts=3, timeout=timeout)
448
+
449
+ @external_api_call(ApiType.HTTP_DOWNLOAD, "curl_download", target=url, policy=policy)
450
+ def _do_curl() -> tuple[int, str]:
451
+ cmd = ["curl"]
452
+
453
+ if follow_redirects:
454
+ cmd.append("-fL")
455
+ else:
456
+ cmd.append("-f")
457
+
458
+ if silent:
459
+ cmd.append("-sS")
460
+
461
+ # Write HTTP status code to stdout
462
+ cmd.extend(["-w", "%{http_code}"])
463
+
464
+ # Set timeout
465
+ cmd.extend(["--max-time", str(int(timeout))])
466
+
467
+ # Output file
468
+ cmd.extend(["-o", output_path])
469
+
470
+ # URL (last argument)
471
+ cmd.append(url)
472
+
473
+ # Helper functions for raising errors to comply with TRY301
474
+ def _raise_curl_failed(returncode: int, error_msg: str) -> None:
475
+ raise RuntimeError(_MSG_CURL_FAILED_WITH_RC.format(_MSG_CURL_FAILED, returncode, error_msg))
476
+
477
+ def _raise_no_output() -> None:
478
+ raise RuntimeError(_MSG_CURL_NO_OUTPUT)
479
+
480
+ def _raise_timeout(timeout_val: float) -> None:
481
+ raise TimeoutError(_MSG_CURL_TIMEOUT_WITH_TIME.format(_MSG_CURL_TIMEOUT, timeout_val))
482
+
483
+ def _raise_download_failed(exc: Exception) -> None:
484
+ raise RuntimeError(_MSG_CURL_DOWNLOAD_FAILED_WITH_EXC.format(_MSG_CURL_DOWNLOAD_FAILED, exc)) from exc
485
+
486
+ # Initialize variables
487
+ result = None
488
+ http_status = "unknown"
489
+
490
+ try:
491
+ result = subprocess.run( # noqa: S603
492
+ cmd,
493
+ capture_output=True,
494
+ text=True,
495
+ timeout=timeout + 5, # Give subprocess a bit more time than curl
496
+ check=False,
497
+ )
498
+
499
+ # Extract HTTP status code from stdout
500
+ http_status = result.stdout.strip() if result.stdout else "unknown"
501
+
502
+ if result.returncode != 0:
503
+ error_msg = result.stderr.strip() if result.stderr else _MSG_CURL_FAILED
504
+ _raise_curl_failed(result.returncode, error_msg)
505
+
506
+ # Verify file was created
507
+ if not Path(output_path).exists():
508
+ _raise_no_output()
509
+
510
+ except subprocess.TimeoutExpired:
511
+ _raise_timeout(timeout)
512
+ except Exception as exc:
513
+ _raise_download_failed(exc)
514
+
515
+ return result.returncode if result else -1, http_status
516
+
517
+ return _do_curl()