airbyte-source-github 1.7.13__tar.gz → 1.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/PKG-INFO +2 -2
  2. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/pyproject.toml +2 -2
  3. airbyte_source_github-1.8.0/source_github/backoff_strategies.py +51 -0
  4. airbyte_source_github-1.8.0/source_github/errors_handlers.py +126 -0
  5. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/source.py +4 -2
  6. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/streams.py +28 -77
  7. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/README.md +0 -0
  8. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/__init__.py +0 -0
  9. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/config_migrations.py +0 -0
  10. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/constants.py +0 -0
  11. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/github_schema.py +0 -0
  12. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/graphql.py +0 -0
  13. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/run.py +0 -0
  14. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/assignees.json +0 -0
  15. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/branches.json +0 -0
  16. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/collaborators.json +0 -0
  17. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/comments.json +0 -0
  18. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/commit_comment_reactions.json +0 -0
  19. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/commit_comments.json +0 -0
  20. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/commits.json +0 -0
  21. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/contributor_activity.json +0 -0
  22. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/deployments.json +0 -0
  23. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/events.json +0 -0
  24. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/issue_comment_reactions.json +0 -0
  25. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/issue_events.json +0 -0
  26. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/issue_labels.json +0 -0
  27. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/issue_milestones.json +0 -0
  28. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/issue_reactions.json +0 -0
  29. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/issue_timeline_events.json +0 -0
  30. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/issues.json +0 -0
  31. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/organizations.json +0 -0
  32. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/project_cards.json +0 -0
  33. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/project_columns.json +0 -0
  34. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/projects.json +0 -0
  35. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/projects_v2.json +0 -0
  36. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/pull_request_comment_reactions.json +0 -0
  37. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/pull_request_commits.json +0 -0
  38. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/pull_request_stats.json +0 -0
  39. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/pull_requests.json +0 -0
  40. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/releases.json +0 -0
  41. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/repositories.json +0 -0
  42. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/review_comments.json +0 -0
  43. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/reviews.json +0 -0
  44. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/shared/events/comment.json +0 -0
  45. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/shared/events/commented.json +0 -0
  46. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/shared/events/committed.json +0 -0
  47. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/shared/events/cross_referenced.json +0 -0
  48. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/shared/events/reviewed.json +0 -0
  49. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/shared/reaction.json +0 -0
  50. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/shared/reactions.json +0 -0
  51. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/shared/user.json +0 -0
  52. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/shared/user_graphql.json +0 -0
  53. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/stargazers.json +0 -0
  54. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/tags.json +0 -0
  55. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/team_members.json +0 -0
  56. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/team_memberships.json +0 -0
  57. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/teams.json +0 -0
  58. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/users.json +0 -0
  59. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/workflow_jobs.json +0 -0
  60. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/workflow_runs.json +0 -0
  61. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/schemas/workflows.json +0 -0
  62. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/spec.json +0 -0
  63. {airbyte_source_github-1.7.13 → airbyte_source_github-1.8.0}/source_github/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-github
3
- Version: 1.7.13
3
+ Version: 1.8.0
4
4
  Summary: Source implementation for GitHub.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
- Requires-Dist: airbyte-cdk (==0.90.0)
15
+ Requires-Dist: airbyte-cdk (>=3,<4)
16
16
  Requires-Dist: sgqlc (==16.3)
17
17
  Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/github
18
18
  Project-URL: Repository, https://github.com/airbytehq/airbyte
@@ -5,7 +5,7 @@ requires = [
5
5
  build-backend = "poetry.core.masonry.api"
6
6
 
7
7
  [tool.poetry]
8
- version = "1.7.13"
8
+ version = "1.8.0"
9
9
  name = "airbyte-source-github"
10
10
  description = "Source implementation for GitHub."
11
11
  authors = [
@@ -22,7 +22,7 @@ packages = [
22
22
 
23
23
  [tool.poetry.dependencies]
24
24
  python = "^3.9,<3.12"
25
- airbyte-cdk = "0.90.0"
25
+ airbyte-cdk = "^3"
26
26
  sgqlc = "==16.3"
27
27
 
28
28
  [tool.poetry.scripts]
@@ -0,0 +1,51 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import time
6
+ from typing import Any, Optional, Union
7
+
8
+ import requests
9
+ from airbyte_cdk import BackoffStrategy
10
+ from airbyte_cdk.sources.streams.http import HttpStream
11
+
12
+
13
+ class GithubStreamABCBackoffStrategy(BackoffStrategy):
14
+ def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa
15
+ self.stream = stream
16
+ super().__init__(**kwargs)
17
+
18
+ def backoff_time(
19
+ self, response_or_exception: Optional[Union[requests.Response, requests.RequestException]], **kwargs: Any
20
+ ) -> Optional[float]:
21
+ # This method is called if we run into the rate limit. GitHub limits requests to 5000 per hour and provides
22
+ # `X-RateLimit-Reset` header which contains time when this hour will be finished and limits will be reset so
23
+ # we again could have 5000 per another hour.
24
+ if isinstance(response_or_exception, requests.Response):
25
+ min_backoff_time = 60.0
26
+ retry_after = response_or_exception.headers.get("Retry-After")
27
+ if retry_after is not None:
28
+ backoff_time_in_seconds = max(float(retry_after), min_backoff_time)
29
+ return self.get_waiting_time(backoff_time_in_seconds)
30
+
31
+ reset_time = response_or_exception.headers.get("X-RateLimit-Reset")
32
+ if reset_time:
33
+ backoff_time_in_seconds = max(float(reset_time) - time.time(), min_backoff_time)
34
+ return self.get_waiting_time(backoff_time_in_seconds)
35
+ return None
36
+
37
+ def get_waiting_time(self, backoff_time_in_seconds: Optional[float]) -> Optional[float]:
38
+ if backoff_time_in_seconds < 60 * 10: # type: ignore[operator]
39
+ return backoff_time_in_seconds
40
+ else:
41
+ self.stream._http_client._session.auth.update_token() # New token will be used in next request
42
+ return 1
43
+
44
+
45
+ class ContributorActivityBackoffStrategy(BackoffStrategy):
46
+ def backoff_time(
47
+ self, response_or_exception: Optional[Union[requests.Response, requests.RequestException]], **kwargs: Any
48
+ ) -> Optional[float]:
49
+ if isinstance(response_or_exception, requests.Response) and response_or_exception.status_code == requests.codes.ACCEPTED:
50
+ return 90
51
+ return None
@@ -0,0 +1,126 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from typing import Optional, Union
6
+
7
+ import requests
8
+ from airbyte_cdk.sources.streams.http import HttpStream
9
+ from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
10
+ from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping import DEFAULT_ERROR_MAPPING
11
+ from airbyte_protocol.models import FailureType
12
+
13
+ from . import constants
14
+
15
+ GITHUB_DEFAULT_ERROR_MAPPING = DEFAULT_ERROR_MAPPING | {
16
+ 401: ErrorResolution(
17
+ response_action=ResponseAction.RETRY,
18
+ failure_type=FailureType.config_error,
19
+ error_message="Conflict.",
20
+ ),
21
+ 403: ErrorResolution(
22
+ response_action=ResponseAction.RETRY,
23
+ failure_type=FailureType.config_error,
24
+ error_message="Conflict.",
25
+ ),
26
+ 404: ErrorResolution(
27
+ response_action=ResponseAction.RETRY,
28
+ failure_type=FailureType.config_error,
29
+ error_message="Conflict.",
30
+ ),
31
+ 409: ErrorResolution(
32
+ response_action=ResponseAction.RETRY,
33
+ failure_type=FailureType.config_error,
34
+ error_message="Conflict.",
35
+ ),
36
+ 410: ErrorResolution(
37
+ response_action=ResponseAction.RETRY,
38
+ failure_type=FailureType.config_error,
39
+ error_message="Gone. Please ensure the url is valid.",
40
+ ),
41
+ }
42
+
43
+
44
+ class GithubStreamABCErrorHandler(HttpStatusErrorHandler):
45
+ def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa
46
+ self.stream = stream
47
+ super().__init__(**kwargs)
48
+
49
+ def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
50
+ if isinstance(response_or_exception, requests.Response):
51
+ retry_flag = (
52
+ # The GitHub GraphQL API has limitations
53
+ # https://docs.github.com/en/graphql/overview/resource-limitations
54
+ (
55
+ response_or_exception.headers.get("X-RateLimit-Resource") == "graphql"
56
+ and self.stream.check_graphql_rate_limited(response_or_exception.json())
57
+ )
58
+ # Rate limit HTTP headers
59
+ # https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limit-http-headers
60
+ or (response_or_exception.status_code != 200 and response_or_exception.headers.get("X-RateLimit-Remaining") == "0")
61
+ # Secondary rate limits
62
+ # https://docs.github.com/en/rest/overview/resources-in-the-rest-api#secondary-rate-limits
63
+ or "Retry-After" in response_or_exception.headers
64
+ )
65
+ if retry_flag:
66
+ headers = [
67
+ "X-RateLimit-Resource",
68
+ "X-RateLimit-Remaining",
69
+ "X-RateLimit-Reset",
70
+ "X-RateLimit-Limit",
71
+ "X-RateLimit-Used",
72
+ "Retry-After",
73
+ ]
74
+ string_headers = ", ".join(
75
+ [f"{h}: {response_or_exception.headers[h]}" for h in headers if h in response_or_exception.headers]
76
+ )
77
+ if string_headers:
78
+ string_headers = f"HTTP headers: {string_headers},"
79
+
80
+ self._logger.info(
81
+ f"Rate limit handling for stream `{self.stream.name}` for the response with {response_or_exception.status_code} status code, {string_headers} with message: {response_or_exception.text}"
82
+ )
83
+ return ErrorResolution(
84
+ response_action=ResponseAction.RATE_LIMITED,
85
+ failure_type=FailureType.transient_error,
86
+ error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
87
+ )
88
+
89
+ return super().interpret_response(response_or_exception)
90
+
91
+
92
+ class ContributorActivityErrorHandler(HttpStatusErrorHandler):
93
+ def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
94
+ if isinstance(response_or_exception, requests.Response) and response_or_exception.status_code == requests.codes.ACCEPTED:
95
+ return ErrorResolution(
96
+ response_action=ResponseAction.RETRY,
97
+ failure_type=FailureType.transient_error,
98
+ error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
99
+ )
100
+
101
+ return super().interpret_response(response_or_exception)
102
+
103
+
104
+ class GitHubGraphQLErrorHandler(GithubStreamABCErrorHandler):
105
+ def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
106
+ if isinstance(response_or_exception, requests.Response):
107
+ if response_or_exception.status_code in (requests.codes.BAD_GATEWAY, requests.codes.GATEWAY_TIMEOUT):
108
+ self.stream.page_size = int(self.stream.page_size / 2)
109
+ return ErrorResolution(
110
+ response_action=ResponseAction.RETRY,
111
+ failure_type=FailureType.transient_error,
112
+ error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
113
+ )
114
+
115
+ self.stream.page_size = (
116
+ constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM if self.stream.large_stream else constants.DEFAULT_PAGE_SIZE
117
+ )
118
+
119
+ if response_or_exception.json().get("errors"):
120
+ return ErrorResolution(
121
+ response_action=ResponseAction.RETRY,
122
+ failure_type=FailureType.transient_error,
123
+ error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
124
+ )
125
+
126
+ return super().interpret_response(response_or_exception)
@@ -65,7 +65,7 @@ class SourceGithub(AbstractSource):
65
65
 
66
66
  @staticmethod
67
67
  def _get_org_repositories(
68
- config: Mapping[str, Any], authenticator: MultipleTokenAuthenticator
68
+ config: Mapping[str, Any], authenticator: MultipleTokenAuthenticator, is_check_connection: bool = False
69
69
  ) -> Tuple[List[str], List[str], Optional[str]]:
70
70
  """
71
71
  Parse config/repositories and produce two lists: organizations, repositories.
@@ -92,6 +92,7 @@ class SourceGithub(AbstractSource):
92
92
  org_names = [org.split("/")[0] for org in unchecked_orgs]
93
93
  pattern = "|".join([f"({org.replace('*', '.*')})" for org in unchecked_orgs])
94
94
  stream = Repositories(authenticator=authenticator, organizations=org_names, api_url=config.get("api_url"), pattern=pattern)
95
+ stream.exit_on_rate_limit = True if is_check_connection else False
95
96
  for record in read_full_refresh(stream):
96
97
  repositories.add(record["full_name"])
97
98
  organizations.add(record["organization"])
@@ -105,6 +106,7 @@ class SourceGithub(AbstractSource):
105
106
  # This parameter is deprecated and in future will be used sane default, page_size: 10
106
107
  page_size_for_large_streams=config.get("page_size_for_large_streams", constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM),
107
108
  )
109
+ stream.exit_on_rate_limit = True if is_check_connection else False
108
110
  for record in read_full_refresh(stream):
109
111
  repositories.add(record["full_name"])
110
112
  organization = record.get("organization", {}).get("login")
@@ -194,7 +196,7 @@ class SourceGithub(AbstractSource):
194
196
  config = self._validate_and_transform_config(config)
195
197
  try:
196
198
  authenticator = self._get_authenticator(config)
197
- _, repositories, _ = self._get_org_repositories(config=config, authenticator=authenticator)
199
+ _, repositories, _ = self._get_org_repositories(config=config, authenticator=authenticator, is_check_connection=True)
198
200
  if not repositories:
199
201
  return (
200
202
  False,
@@ -3,24 +3,31 @@
3
3
  #
4
4
 
5
5
  import re
6
- import time
7
6
  from abc import ABC, abstractmethod
8
- from typing import Any, Iterable, List, Mapping, MutableMapping, Optional
7
+ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
9
8
  from urllib import parse
10
9
 
11
10
  import pendulum
12
11
  import requests
12
+ from airbyte_cdk import BackoffStrategy
13
13
  from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode
14
14
  from airbyte_cdk.models import Type as MessageType
15
15
  from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
16
- from airbyte_cdk.sources.streams.core import CheckpointMixin
16
+ from airbyte_cdk.sources.streams.core import CheckpointMixin, Stream
17
17
  from airbyte_cdk.sources.streams.http import HttpStream
18
- from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException
18
+ from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
19
+ from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException, UserDefinedBackoffException
19
20
  from airbyte_cdk.utils import AirbyteTracedException
20
21
  from airbyte_protocol.models import FailureType
21
- from requests.exceptions import HTTPError
22
22
 
23
23
  from . import constants
24
+ from .backoff_strategies import ContributorActivityBackoffStrategy, GithubStreamABCBackoffStrategy
25
+ from .errors_handlers import (
26
+ GITHUB_DEFAULT_ERROR_MAPPING,
27
+ ContributorActivityErrorHandler,
28
+ GitHubGraphQLErrorHandler,
29
+ GithubStreamABCErrorHandler,
30
+ )
24
31
  from .graphql import (
25
32
  CursorStorage,
26
33
  QueryReactions,
@@ -38,7 +45,7 @@ class GithubStreamABC(HttpStream, ABC):
38
45
 
39
46
  # Detect streams with high API load
40
47
  large_stream = False
41
-
48
+ max_retries: int = 5
42
49
  stream_base_params = {}
43
50
 
44
51
  def __init__(self, api_url: str = "https://api.github.com", access_token_type: str = "", **kwargs):
@@ -93,62 +100,13 @@ class GithubStreamABC(HttpStream, ABC):
93
100
  for record in response.json(): # GitHub puts records in an array.
94
101
  yield self.transform(record=record, stream_slice=stream_slice)
95
102
 
96
- def should_retry(self, response: requests.Response) -> bool:
97
- if super().should_retry(response):
98
- return True
99
-
100
- retry_flag = (
101
- # The GitHub GraphQL API has limitations
102
- # https://docs.github.com/en/graphql/overview/resource-limitations
103
- (response.headers.get("X-RateLimit-Resource") == "graphql" and self.check_graphql_rate_limited(response.json()))
104
- # Rate limit HTTP headers
105
- # https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limit-http-headers
106
- or (response.status_code != 200 and response.headers.get("X-RateLimit-Remaining") == "0")
107
- # Secondary rate limits
108
- # https://docs.github.com/en/rest/overview/resources-in-the-rest-api#secondary-rate-limits
109
- or "Retry-After" in response.headers
103
+ def get_error_handler(self) -> Optional[ErrorHandler]:
104
+ return GithubStreamABCErrorHandler(
105
+ logger=self.logger, max_retries=self.max_retries, error_mapping=GITHUB_DEFAULT_ERROR_MAPPING, stream=self
110
106
  )
111
- if retry_flag:
112
- headers = [
113
- "X-RateLimit-Resource",
114
- "X-RateLimit-Remaining",
115
- "X-RateLimit-Reset",
116
- "X-RateLimit-Limit",
117
- "X-RateLimit-Used",
118
- "Retry-After",
119
- ]
120
- headers = ", ".join([f"{h}: {response.headers[h]}" for h in headers if h in response.headers])
121
- if headers:
122
- headers = f"HTTP headers: {headers},"
123
-
124
- self.logger.info(
125
- f"Rate limit handling for stream `{self.name}` for the response with {response.status_code} status code, {headers} with message: {response.text}"
126
- )
127
-
128
- return retry_flag
129
-
130
- def backoff_time(self, response: requests.Response) -> Optional[float]:
131
- # This method is called if we run into the rate limit. GitHub limits requests to 5000 per hour and provides
132
- # `X-RateLimit-Reset` header which contains time when this hour will be finished and limits will be reset so
133
- # we again could have 5000 per another hour.
134
-
135
- min_backoff_time = 60.0
136
- retry_after = response.headers.get("Retry-After")
137
- if retry_after is not None:
138
- backoff_time_in_seconds = max(float(retry_after), min_backoff_time)
139
- return self.get_waiting_time(backoff_time_in_seconds)
140
107
 
141
- reset_time = response.headers.get("X-RateLimit-Reset")
142
- if reset_time:
143
- backoff_time_in_seconds = max(float(reset_time) - time.time(), min_backoff_time)
144
- return self.get_waiting_time(backoff_time_in_seconds)
145
-
146
- def get_waiting_time(self, backoff_time_in_seconds):
147
- if backoff_time_in_seconds < self.max_time:
148
- return backoff_time_in_seconds
149
- else:
150
- self._session.auth.update_token() # New token will be used in next request
151
- return 1
108
+ def get_backoff_strategy(self) -> Optional[Union[BackoffStrategy, List[BackoffStrategy]]]:
109
+ return GithubStreamABCBackoffStrategy(stream=self)
152
110
 
153
111
  @staticmethod
154
112
  def check_graphql_rate_limited(response_json: dict) -> bool:
@@ -166,7 +124,7 @@ class GithubStreamABC(HttpStream, ABC):
166
124
  # Reading records while handling the errors
167
125
  try:
168
126
  yield from super().read_records(stream_slice=stream_slice, **kwargs)
169
- except HTTPError as e:
127
+ except DefaultBackoffException as e:
170
128
  # This whole try/except situation in `read_records()` isn't good but right now in `self._send_request()`
171
129
  # function we have `response.raise_for_status()` so we don't have much choice on how to handle errors.
172
130
  # Bocked on https://github.com/airbytehq/airbyte/issues/3514.
@@ -802,12 +760,10 @@ class GitHubGraphQLStream(GithubStream, ABC):
802
760
  ) -> str:
803
761
  return "graphql"
804
762
 
805
- def should_retry(self, response: requests.Response) -> bool:
806
- if response.status_code in (requests.codes.BAD_GATEWAY, requests.codes.GATEWAY_TIMEOUT):
807
- self.page_size = int(self.page_size / 2)
808
- return True
809
- self.page_size = constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM if self.large_stream else constants.DEFAULT_PAGE_SIZE
810
- return super().should_retry(response) or response.json().get("errors")
763
+ def get_error_handler(self) -> Optional[ErrorHandler]:
764
+ return GitHubGraphQLErrorHandler(
765
+ logger=self.logger, max_retries=self.max_retries, error_mapping=GITHUB_DEFAULT_ERROR_MAPPING, stream=self
766
+ )
811
767
 
812
768
  def _get_repository_name(self, repository: Mapping[str, Any]) -> str:
813
769
  return repository["owner"]["login"] + "/" + repository["name"]
@@ -1656,16 +1612,11 @@ class ContributorActivity(GithubStream):
1656
1612
  record.update(record.pop("author"))
1657
1613
  return record
1658
1614
 
1659
- def should_retry(self, response: requests.Response) -> bool:
1660
- """
1661
- If the data hasn't been cached when you query a repository's statistics, you'll receive a 202 response, need to retry to get results
1662
- see for more info https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#a-word-about-caching
1663
- """
1664
- if super().should_retry(response) or response.status_code == requests.codes.ACCEPTED:
1665
- return True
1615
+ def get_error_handler(self) -> Optional[ErrorHandler]:
1616
+ return ContributorActivityErrorHandler(logger=self.logger, max_retries=self.max_retries, error_mapping=GITHUB_DEFAULT_ERROR_MAPPING)
1666
1617
 
1667
- def backoff_time(self, response: requests.Response) -> Optional[float]:
1668
- return 90 if response.status_code == requests.codes.ACCEPTED else super().backoff_time(response)
1618
+ def get_backoff_strategy(self) -> Optional[Union[BackoffStrategy, List[BackoffStrategy]]]:
1619
+ return ContributorActivityBackoffStrategy()
1669
1620
 
1670
1621
  def parse_response(
1671
1622
  self,
@@ -1685,7 +1636,7 @@ class ContributorActivity(GithubStream):
1685
1636
  repository = stream_slice.get("repository", "")
1686
1637
  try:
1687
1638
  yield from super().read_records(stream_slice=stream_slice, **kwargs)
1688
- except HTTPError as e:
1639
+ except UserDefinedBackoffException as e:
1689
1640
  if e.response.status_code == requests.codes.ACCEPTED:
1690
1641
  yield AirbyteMessage(
1691
1642
  type=MessageType.LOG,