airbyte-source-github 1.7.12.dev202407111311__tar.gz → 1.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/PKG-INFO +2 -2
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/pyproject.toml +2 -2
- airbyte_source_github-1.8.0/source_github/backoff_strategies.py +51 -0
- airbyte_source_github-1.8.0/source_github/errors_handlers.py +126 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/source.py +4 -2
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/streams.py +28 -77
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/README.md +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/__init__.py +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/config_migrations.py +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/constants.py +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/github_schema.py +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/graphql.py +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/run.py +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/assignees.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/branches.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/collaborators.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/comments.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/commit_comment_reactions.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/commit_comments.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/commits.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/contributor_activity.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/deployments.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/events.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/issue_comment_reactions.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/issue_events.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/issue_labels.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/issue_milestones.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/issue_reactions.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/issue_timeline_events.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/issues.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/organizations.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/project_cards.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/project_columns.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/projects.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/projects_v2.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/pull_request_comment_reactions.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/pull_request_commits.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/pull_request_stats.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/pull_requests.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/releases.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/repositories.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/review_comments.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/reviews.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/shared/events/comment.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/shared/events/commented.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/shared/events/committed.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/shared/events/cross_referenced.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/shared/events/reviewed.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/shared/reaction.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/shared/reactions.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/shared/user.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/shared/user_graphql.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/stargazers.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/tags.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/team_members.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/team_memberships.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/teams.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/users.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/workflow_jobs.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/workflow_runs.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/schemas/workflows.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/spec.json +0 -0
- {airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-source-github
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.8.0
|
4
4
|
Summary: Source implementation for GitHub.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.9
|
13
13
|
Classifier: Programming Language :: Python :: 3.10
|
14
14
|
Classifier: Programming Language :: Python :: 3.11
|
15
|
-
Requires-Dist: airbyte-cdk (
|
15
|
+
Requires-Dist: airbyte-cdk (>=3,<4)
|
16
16
|
Requires-Dist: sgqlc (==16.3)
|
17
17
|
Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/github
|
18
18
|
Project-URL: Repository, https://github.com/airbytehq/airbyte
|
@@ -5,7 +5,7 @@ requires = [
|
|
5
5
|
build-backend = "poetry.core.masonry.api"
|
6
6
|
|
7
7
|
[tool.poetry]
|
8
|
-
version = "1.
|
8
|
+
version = "1.8.0"
|
9
9
|
name = "airbyte-source-github"
|
10
10
|
description = "Source implementation for GitHub."
|
11
11
|
authors = [
|
@@ -22,7 +22,7 @@ packages = [
|
|
22
22
|
|
23
23
|
[tool.poetry.dependencies]
|
24
24
|
python = "^3.9,<3.12"
|
25
|
-
airbyte-cdk = "
|
25
|
+
airbyte-cdk = "^3"
|
26
26
|
sgqlc = "==16.3"
|
27
27
|
|
28
28
|
[tool.poetry.scripts]
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import time
|
6
|
+
from typing import Any, Optional, Union
|
7
|
+
|
8
|
+
import requests
|
9
|
+
from airbyte_cdk import BackoffStrategy
|
10
|
+
from airbyte_cdk.sources.streams.http import HttpStream
|
11
|
+
|
12
|
+
|
13
|
+
class GithubStreamABCBackoffStrategy(BackoffStrategy):
|
14
|
+
def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa
|
15
|
+
self.stream = stream
|
16
|
+
super().__init__(**kwargs)
|
17
|
+
|
18
|
+
def backoff_time(
|
19
|
+
self, response_or_exception: Optional[Union[requests.Response, requests.RequestException]], **kwargs: Any
|
20
|
+
) -> Optional[float]:
|
21
|
+
# This method is called if we run into the rate limit. GitHub limits requests to 5000 per hour and provides
|
22
|
+
# `X-RateLimit-Reset` header which contains time when this hour will be finished and limits will be reset so
|
23
|
+
# we again could have 5000 per another hour.
|
24
|
+
if isinstance(response_or_exception, requests.Response):
|
25
|
+
min_backoff_time = 60.0
|
26
|
+
retry_after = response_or_exception.headers.get("Retry-After")
|
27
|
+
if retry_after is not None:
|
28
|
+
backoff_time_in_seconds = max(float(retry_after), min_backoff_time)
|
29
|
+
return self.get_waiting_time(backoff_time_in_seconds)
|
30
|
+
|
31
|
+
reset_time = response_or_exception.headers.get("X-RateLimit-Reset")
|
32
|
+
if reset_time:
|
33
|
+
backoff_time_in_seconds = max(float(reset_time) - time.time(), min_backoff_time)
|
34
|
+
return self.get_waiting_time(backoff_time_in_seconds)
|
35
|
+
return None
|
36
|
+
|
37
|
+
def get_waiting_time(self, backoff_time_in_seconds: Optional[float]) -> Optional[float]:
|
38
|
+
if backoff_time_in_seconds < 60 * 10: # type: ignore[operator]
|
39
|
+
return backoff_time_in_seconds
|
40
|
+
else:
|
41
|
+
self.stream._http_client._session.auth.update_token() # New token will be used in next request
|
42
|
+
return 1
|
43
|
+
|
44
|
+
|
45
|
+
class ContributorActivityBackoffStrategy(BackoffStrategy):
|
46
|
+
def backoff_time(
|
47
|
+
self, response_or_exception: Optional[Union[requests.Response, requests.RequestException]], **kwargs: Any
|
48
|
+
) -> Optional[float]:
|
49
|
+
if isinstance(response_or_exception, requests.Response) and response_or_exception.status_code == requests.codes.ACCEPTED:
|
50
|
+
return 90
|
51
|
+
return None
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Optional, Union
|
6
|
+
|
7
|
+
import requests
|
8
|
+
from airbyte_cdk.sources.streams.http import HttpStream
|
9
|
+
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
|
10
|
+
from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping import DEFAULT_ERROR_MAPPING
|
11
|
+
from airbyte_protocol.models import FailureType
|
12
|
+
|
13
|
+
from . import constants
|
14
|
+
|
15
|
+
GITHUB_DEFAULT_ERROR_MAPPING = DEFAULT_ERROR_MAPPING | {
|
16
|
+
401: ErrorResolution(
|
17
|
+
response_action=ResponseAction.RETRY,
|
18
|
+
failure_type=FailureType.config_error,
|
19
|
+
error_message="Conflict.",
|
20
|
+
),
|
21
|
+
403: ErrorResolution(
|
22
|
+
response_action=ResponseAction.RETRY,
|
23
|
+
failure_type=FailureType.config_error,
|
24
|
+
error_message="Conflict.",
|
25
|
+
),
|
26
|
+
404: ErrorResolution(
|
27
|
+
response_action=ResponseAction.RETRY,
|
28
|
+
failure_type=FailureType.config_error,
|
29
|
+
error_message="Conflict.",
|
30
|
+
),
|
31
|
+
409: ErrorResolution(
|
32
|
+
response_action=ResponseAction.RETRY,
|
33
|
+
failure_type=FailureType.config_error,
|
34
|
+
error_message="Conflict.",
|
35
|
+
),
|
36
|
+
410: ErrorResolution(
|
37
|
+
response_action=ResponseAction.RETRY,
|
38
|
+
failure_type=FailureType.config_error,
|
39
|
+
error_message="Gone. Please ensure the url is valid.",
|
40
|
+
),
|
41
|
+
}
|
42
|
+
|
43
|
+
|
44
|
+
class GithubStreamABCErrorHandler(HttpStatusErrorHandler):
|
45
|
+
def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa
|
46
|
+
self.stream = stream
|
47
|
+
super().__init__(**kwargs)
|
48
|
+
|
49
|
+
def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
|
50
|
+
if isinstance(response_or_exception, requests.Response):
|
51
|
+
retry_flag = (
|
52
|
+
# The GitHub GraphQL API has limitations
|
53
|
+
# https://docs.github.com/en/graphql/overview/resource-limitations
|
54
|
+
(
|
55
|
+
response_or_exception.headers.get("X-RateLimit-Resource") == "graphql"
|
56
|
+
and self.stream.check_graphql_rate_limited(response_or_exception.json())
|
57
|
+
)
|
58
|
+
# Rate limit HTTP headers
|
59
|
+
# https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limit-http-headers
|
60
|
+
or (response_or_exception.status_code != 200 and response_or_exception.headers.get("X-RateLimit-Remaining") == "0")
|
61
|
+
# Secondary rate limits
|
62
|
+
# https://docs.github.com/en/rest/overview/resources-in-the-rest-api#secondary-rate-limits
|
63
|
+
or "Retry-After" in response_or_exception.headers
|
64
|
+
)
|
65
|
+
if retry_flag:
|
66
|
+
headers = [
|
67
|
+
"X-RateLimit-Resource",
|
68
|
+
"X-RateLimit-Remaining",
|
69
|
+
"X-RateLimit-Reset",
|
70
|
+
"X-RateLimit-Limit",
|
71
|
+
"X-RateLimit-Used",
|
72
|
+
"Retry-After",
|
73
|
+
]
|
74
|
+
string_headers = ", ".join(
|
75
|
+
[f"{h}: {response_or_exception.headers[h]}" for h in headers if h in response_or_exception.headers]
|
76
|
+
)
|
77
|
+
if string_headers:
|
78
|
+
string_headers = f"HTTP headers: {string_headers},"
|
79
|
+
|
80
|
+
self._logger.info(
|
81
|
+
f"Rate limit handling for stream `{self.stream.name}` for the response with {response_or_exception.status_code} status code, {string_headers} with message: {response_or_exception.text}"
|
82
|
+
)
|
83
|
+
return ErrorResolution(
|
84
|
+
response_action=ResponseAction.RATE_LIMITED,
|
85
|
+
failure_type=FailureType.transient_error,
|
86
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
|
87
|
+
)
|
88
|
+
|
89
|
+
return super().interpret_response(response_or_exception)
|
90
|
+
|
91
|
+
|
92
|
+
class ContributorActivityErrorHandler(HttpStatusErrorHandler):
|
93
|
+
def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
|
94
|
+
if isinstance(response_or_exception, requests.Response) and response_or_exception.status_code == requests.codes.ACCEPTED:
|
95
|
+
return ErrorResolution(
|
96
|
+
response_action=ResponseAction.RETRY,
|
97
|
+
failure_type=FailureType.transient_error,
|
98
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
|
99
|
+
)
|
100
|
+
|
101
|
+
return super().interpret_response(response_or_exception)
|
102
|
+
|
103
|
+
|
104
|
+
class GitHubGraphQLErrorHandler(GithubStreamABCErrorHandler):
|
105
|
+
def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
|
106
|
+
if isinstance(response_or_exception, requests.Response):
|
107
|
+
if response_or_exception.status_code in (requests.codes.BAD_GATEWAY, requests.codes.GATEWAY_TIMEOUT):
|
108
|
+
self.stream.page_size = int(self.stream.page_size / 2)
|
109
|
+
return ErrorResolution(
|
110
|
+
response_action=ResponseAction.RETRY,
|
111
|
+
failure_type=FailureType.transient_error,
|
112
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
|
113
|
+
)
|
114
|
+
|
115
|
+
self.stream.page_size = (
|
116
|
+
constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM if self.stream.large_stream else constants.DEFAULT_PAGE_SIZE
|
117
|
+
)
|
118
|
+
|
119
|
+
if response_or_exception.json().get("errors"):
|
120
|
+
return ErrorResolution(
|
121
|
+
response_action=ResponseAction.RETRY,
|
122
|
+
failure_type=FailureType.transient_error,
|
123
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
|
124
|
+
)
|
125
|
+
|
126
|
+
return super().interpret_response(response_or_exception)
|
{airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/source.py
RENAMED
@@ -65,7 +65,7 @@ class SourceGithub(AbstractSource):
|
|
65
65
|
|
66
66
|
@staticmethod
|
67
67
|
def _get_org_repositories(
|
68
|
-
config: Mapping[str, Any], authenticator: MultipleTokenAuthenticator
|
68
|
+
config: Mapping[str, Any], authenticator: MultipleTokenAuthenticator, is_check_connection: bool = False
|
69
69
|
) -> Tuple[List[str], List[str], Optional[str]]:
|
70
70
|
"""
|
71
71
|
Parse config/repositories and produce two lists: organizations, repositories.
|
@@ -92,6 +92,7 @@ class SourceGithub(AbstractSource):
|
|
92
92
|
org_names = [org.split("/")[0] for org in unchecked_orgs]
|
93
93
|
pattern = "|".join([f"({org.replace('*', '.*')})" for org in unchecked_orgs])
|
94
94
|
stream = Repositories(authenticator=authenticator, organizations=org_names, api_url=config.get("api_url"), pattern=pattern)
|
95
|
+
stream.exit_on_rate_limit = True if is_check_connection else False
|
95
96
|
for record in read_full_refresh(stream):
|
96
97
|
repositories.add(record["full_name"])
|
97
98
|
organizations.add(record["organization"])
|
@@ -105,6 +106,7 @@ class SourceGithub(AbstractSource):
|
|
105
106
|
# This parameter is deprecated and in future will be used sane default, page_size: 10
|
106
107
|
page_size_for_large_streams=config.get("page_size_for_large_streams", constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM),
|
107
108
|
)
|
109
|
+
stream.exit_on_rate_limit = True if is_check_connection else False
|
108
110
|
for record in read_full_refresh(stream):
|
109
111
|
repositories.add(record["full_name"])
|
110
112
|
organization = record.get("organization", {}).get("login")
|
@@ -194,7 +196,7 @@ class SourceGithub(AbstractSource):
|
|
194
196
|
config = self._validate_and_transform_config(config)
|
195
197
|
try:
|
196
198
|
authenticator = self._get_authenticator(config)
|
197
|
-
_, repositories, _ = self._get_org_repositories(config=config, authenticator=authenticator)
|
199
|
+
_, repositories, _ = self._get_org_repositories(config=config, authenticator=authenticator, is_check_connection=True)
|
198
200
|
if not repositories:
|
199
201
|
return (
|
200
202
|
False,
|
@@ -3,24 +3,31 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import re
|
6
|
-
import time
|
7
6
|
from abc import ABC, abstractmethod
|
8
|
-
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional
|
7
|
+
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
9
8
|
from urllib import parse
|
10
9
|
|
11
10
|
import pendulum
|
12
11
|
import requests
|
12
|
+
from airbyte_cdk import BackoffStrategy
|
13
13
|
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode
|
14
14
|
from airbyte_cdk.models import Type as MessageType
|
15
15
|
from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
|
16
|
-
from airbyte_cdk.sources.streams.core import CheckpointMixin
|
16
|
+
from airbyte_cdk.sources.streams.core import CheckpointMixin, Stream
|
17
17
|
from airbyte_cdk.sources.streams.http import HttpStream
|
18
|
-
from airbyte_cdk.sources.streams.http.
|
18
|
+
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
|
19
|
+
from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException, UserDefinedBackoffException
|
19
20
|
from airbyte_cdk.utils import AirbyteTracedException
|
20
21
|
from airbyte_protocol.models import FailureType
|
21
|
-
from requests.exceptions import HTTPError
|
22
22
|
|
23
23
|
from . import constants
|
24
|
+
from .backoff_strategies import ContributorActivityBackoffStrategy, GithubStreamABCBackoffStrategy
|
25
|
+
from .errors_handlers import (
|
26
|
+
GITHUB_DEFAULT_ERROR_MAPPING,
|
27
|
+
ContributorActivityErrorHandler,
|
28
|
+
GitHubGraphQLErrorHandler,
|
29
|
+
GithubStreamABCErrorHandler,
|
30
|
+
)
|
24
31
|
from .graphql import (
|
25
32
|
CursorStorage,
|
26
33
|
QueryReactions,
|
@@ -38,7 +45,7 @@ class GithubStreamABC(HttpStream, ABC):
|
|
38
45
|
|
39
46
|
# Detect streams with high API load
|
40
47
|
large_stream = False
|
41
|
-
|
48
|
+
max_retries: int = 5
|
42
49
|
stream_base_params = {}
|
43
50
|
|
44
51
|
def __init__(self, api_url: str = "https://api.github.com", access_token_type: str = "", **kwargs):
|
@@ -93,62 +100,13 @@ class GithubStreamABC(HttpStream, ABC):
|
|
93
100
|
for record in response.json(): # GitHub puts records in an array.
|
94
101
|
yield self.transform(record=record, stream_slice=stream_slice)
|
95
102
|
|
96
|
-
def
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
retry_flag = (
|
101
|
-
# The GitHub GraphQL API has limitations
|
102
|
-
# https://docs.github.com/en/graphql/overview/resource-limitations
|
103
|
-
(response.headers.get("X-RateLimit-Resource") == "graphql" and self.check_graphql_rate_limited(response.json()))
|
104
|
-
# Rate limit HTTP headers
|
105
|
-
# https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limit-http-headers
|
106
|
-
or (response.status_code != 200 and response.headers.get("X-RateLimit-Remaining") == "0")
|
107
|
-
# Secondary rate limits
|
108
|
-
# https://docs.github.com/en/rest/overview/resources-in-the-rest-api#secondary-rate-limits
|
109
|
-
or "Retry-After" in response.headers
|
103
|
+
def get_error_handler(self) -> Optional[ErrorHandler]:
|
104
|
+
return GithubStreamABCErrorHandler(
|
105
|
+
logger=self.logger, max_retries=self.max_retries, error_mapping=GITHUB_DEFAULT_ERROR_MAPPING, stream=self
|
110
106
|
)
|
111
|
-
if retry_flag:
|
112
|
-
headers = [
|
113
|
-
"X-RateLimit-Resource",
|
114
|
-
"X-RateLimit-Remaining",
|
115
|
-
"X-RateLimit-Reset",
|
116
|
-
"X-RateLimit-Limit",
|
117
|
-
"X-RateLimit-Used",
|
118
|
-
"Retry-After",
|
119
|
-
]
|
120
|
-
headers = ", ".join([f"{h}: {response.headers[h]}" for h in headers if h in response.headers])
|
121
|
-
if headers:
|
122
|
-
headers = f"HTTP headers: {headers},"
|
123
|
-
|
124
|
-
self.logger.info(
|
125
|
-
f"Rate limit handling for stream `{self.name}` for the response with {response.status_code} status code, {headers} with message: {response.text}"
|
126
|
-
)
|
127
|
-
|
128
|
-
return retry_flag
|
129
|
-
|
130
|
-
def backoff_time(self, response: requests.Response) -> Optional[float]:
|
131
|
-
# This method is called if we run into the rate limit. GitHub limits requests to 5000 per hour and provides
|
132
|
-
# `X-RateLimit-Reset` header which contains time when this hour will be finished and limits will be reset so
|
133
|
-
# we again could have 5000 per another hour.
|
134
|
-
|
135
|
-
min_backoff_time = 60.0
|
136
|
-
retry_after = response.headers.get("Retry-After")
|
137
|
-
if retry_after is not None:
|
138
|
-
backoff_time_in_seconds = max(float(retry_after), min_backoff_time)
|
139
|
-
return self.get_waiting_time(backoff_time_in_seconds)
|
140
107
|
|
141
|
-
|
142
|
-
|
143
|
-
backoff_time_in_seconds = max(float(reset_time) - time.time(), min_backoff_time)
|
144
|
-
return self.get_waiting_time(backoff_time_in_seconds)
|
145
|
-
|
146
|
-
def get_waiting_time(self, backoff_time_in_seconds):
|
147
|
-
if backoff_time_in_seconds < self.max_time:
|
148
|
-
return backoff_time_in_seconds
|
149
|
-
else:
|
150
|
-
self._session.auth.update_token() # New token will be used in next request
|
151
|
-
return 1
|
108
|
+
def get_backoff_strategy(self) -> Optional[Union[BackoffStrategy, List[BackoffStrategy]]]:
|
109
|
+
return GithubStreamABCBackoffStrategy(stream=self)
|
152
110
|
|
153
111
|
@staticmethod
|
154
112
|
def check_graphql_rate_limited(response_json: dict) -> bool:
|
@@ -166,7 +124,7 @@ class GithubStreamABC(HttpStream, ABC):
|
|
166
124
|
# Reading records while handling the errors
|
167
125
|
try:
|
168
126
|
yield from super().read_records(stream_slice=stream_slice, **kwargs)
|
169
|
-
except
|
127
|
+
except DefaultBackoffException as e:
|
170
128
|
# This whole try/except situation in `read_records()` isn't good but right now in `self._send_request()`
|
171
129
|
# function we have `response.raise_for_status()` so we don't have much choice on how to handle errors.
|
172
130
|
# Bocked on https://github.com/airbytehq/airbyte/issues/3514.
|
@@ -802,12 +760,10 @@ class GitHubGraphQLStream(GithubStream, ABC):
|
|
802
760
|
) -> str:
|
803
761
|
return "graphql"
|
804
762
|
|
805
|
-
def
|
806
|
-
|
807
|
-
self.
|
808
|
-
|
809
|
-
self.page_size = constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM if self.large_stream else constants.DEFAULT_PAGE_SIZE
|
810
|
-
return super().should_retry(response) or response.json().get("errors")
|
763
|
+
def get_error_handler(self) -> Optional[ErrorHandler]:
|
764
|
+
return GitHubGraphQLErrorHandler(
|
765
|
+
logger=self.logger, max_retries=self.max_retries, error_mapping=GITHUB_DEFAULT_ERROR_MAPPING, stream=self
|
766
|
+
)
|
811
767
|
|
812
768
|
def _get_repository_name(self, repository: Mapping[str, Any]) -> str:
|
813
769
|
return repository["owner"]["login"] + "/" + repository["name"]
|
@@ -1656,16 +1612,11 @@ class ContributorActivity(GithubStream):
|
|
1656
1612
|
record.update(record.pop("author"))
|
1657
1613
|
return record
|
1658
1614
|
|
1659
|
-
def
|
1660
|
-
|
1661
|
-
If the data hasn't been cached when you query a repository's statistics, you'll receive a 202 response, need to retry to get results
|
1662
|
-
see for more info https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#a-word-about-caching
|
1663
|
-
"""
|
1664
|
-
if super().should_retry(response) or response.status_code == requests.codes.ACCEPTED:
|
1665
|
-
return True
|
1615
|
+
def get_error_handler(self) -> Optional[ErrorHandler]:
|
1616
|
+
return ContributorActivityErrorHandler(logger=self.logger, max_retries=self.max_retries, error_mapping=GITHUB_DEFAULT_ERROR_MAPPING)
|
1666
1617
|
|
1667
|
-
def
|
1668
|
-
return
|
1618
|
+
def get_backoff_strategy(self) -> Optional[Union[BackoffStrategy, List[BackoffStrategy]]]:
|
1619
|
+
return ContributorActivityBackoffStrategy()
|
1669
1620
|
|
1670
1621
|
def parse_response(
|
1671
1622
|
self,
|
@@ -1685,7 +1636,7 @@ class ContributorActivity(GithubStream):
|
|
1685
1636
|
repository = stream_slice.get("repository", "")
|
1686
1637
|
try:
|
1687
1638
|
yield from super().read_records(stream_slice=stream_slice, **kwargs)
|
1688
|
-
except
|
1639
|
+
except UserDefinedBackoffException as e:
|
1689
1640
|
if e.response.status_code == requests.codes.ACCEPTED:
|
1690
1641
|
yield AirbyteMessage(
|
1691
1642
|
type=MessageType.LOG,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/run.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/spec.json
RENAMED
File without changes
|
{airbyte_source_github-1.7.12.dev202407111311 → airbyte_source_github-1.8.0}/source_github/utils.py
RENAMED
File without changes
|