airbyte-source-github 1.8.39__tar.gz → 2.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/PKG-INFO +7 -5
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/pyproject.toml +5 -5
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/__init__.py +1 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/config_migrations.py +4 -1
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/errors_handlers.py +1 -1
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/comment.json +2 -2
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/commented.json +2 -2
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/cross_referenced.json +4 -4
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/reactions.json +2 -2
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/source.py +5 -1
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/streams.py +47 -31
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/utils.py +48 -16
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/README.md +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/backoff_strategies.py +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/constants.py +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/github_schema.py +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/graphql.py +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/run.py +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/assignees.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/branches.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/collaborators.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/comments.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/commit_comment_reactions.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/commit_comments.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/commits.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/contributor_activity.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/deployments.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/events.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_comment_reactions.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_events.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_labels.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_milestones.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_reactions.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_timeline_events.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issues.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/organizations.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/project_cards.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/project_columns.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/projects.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/projects_v2.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/pull_request_comment_reactions.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/pull_request_commits.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/pull_request_stats.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/pull_requests.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/releases.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/repositories.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/review_comments.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/reviews.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/committed.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/reviewed.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/reaction.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/user.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/user_graphql.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/stargazers.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/tags.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/team_members.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/team_memberships.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/teams.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/users.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/workflow_jobs.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/workflow_runs.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/workflows.json +0 -0
- {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/spec.json +0 -0
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: airbyte-source-github
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 2.1.5
|
|
4
4
|
Summary: Source implementation for GitHub.
|
|
5
5
|
Home-page: https://airbyte.com
|
|
6
|
-
License:
|
|
6
|
+
License: ELv2
|
|
7
7
|
Author: Airbyte
|
|
8
8
|
Author-email: contact@airbyte.io
|
|
9
|
-
Requires-Python: >=3.10,<3.
|
|
10
|
-
Classifier: License ::
|
|
9
|
+
Requires-Python: >=3.10,<3.14
|
|
10
|
+
Classifier: License :: Other/Proprietary License
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Dist: airbyte-cdk (>=7.4.1,<8.0.0)
|
|
15
17
|
Requires-Dist: sgqlc (==16.3)
|
|
16
18
|
Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/github
|
|
17
19
|
Project-URL: Repository, https://github.com/airbytehq/airbyte
|
|
@@ -3,11 +3,11 @@ requires = [ "poetry-core>=1.0.0",]
|
|
|
3
3
|
build-backend = "poetry.core.masonry.api"
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
|
-
version = "1.
|
|
6
|
+
version = "2.1.5"
|
|
7
7
|
name = "airbyte-source-github"
|
|
8
8
|
description = "Source implementation for GitHub."
|
|
9
9
|
authors = [ "Airbyte <contact@airbyte.io>",]
|
|
10
|
-
license = "
|
|
10
|
+
license = "ELv2"
|
|
11
11
|
readme = "README.md"
|
|
12
12
|
documentation = "https://docs.airbyte.com/integrations/sources/github"
|
|
13
13
|
homepage = "https://airbyte.com"
|
|
@@ -16,8 +16,8 @@ repository = "https://github.com/airbytehq/airbyte"
|
|
|
16
16
|
include = "source_github"
|
|
17
17
|
|
|
18
18
|
[tool.poetry.dependencies]
|
|
19
|
-
python = "^3.10,<3.
|
|
20
|
-
airbyte-cdk = "^4"
|
|
19
|
+
python = "^3.10,<3.14"
|
|
20
|
+
airbyte-cdk = "^7.4.1"
|
|
21
21
|
sgqlc = "==16.3"
|
|
22
22
|
|
|
23
23
|
[tool.poetry.scripts]
|
|
@@ -45,4 +45,4 @@ include = [
|
|
|
45
45
|
test-unit-tests.shell = '''
|
|
46
46
|
poetry run pytest --junitxml=build/test-results/pytest-unit-tests-junit.xml --ignore=unit_tests/integration/ unit_tests
|
|
47
47
|
poetry run pytest --junitxml=build/test-results/pytest-unit-integration-tests-junit.xml unit_tests/integration/
|
|
48
|
-
'''
|
|
48
|
+
'''
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/config_migrations.py
RENAMED
|
@@ -6,8 +6,11 @@ import logging
|
|
|
6
6
|
from abc import ABC
|
|
7
7
|
from typing import Any, List, Mapping
|
|
8
8
|
|
|
9
|
+
import orjson
|
|
10
|
+
|
|
9
11
|
from airbyte_cdk.config_observation import create_connector_config_control_message
|
|
10
12
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
|
13
|
+
from airbyte_cdk.models import AirbyteMessageSerializer
|
|
11
14
|
from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
|
|
12
15
|
|
|
13
16
|
from .source import SourceGithub
|
|
@@ -72,7 +75,7 @@ class MigrateStringToArray(ABC):
|
|
|
72
75
|
cls.message_repository.emit_message(create_connector_config_control_message(migrated_config))
|
|
73
76
|
# emit the Airbyte Control Message from message queue to stdout
|
|
74
77
|
for message in cls.message_repository._message_queue:
|
|
75
|
-
print(message.
|
|
78
|
+
print(orjson.dumps(AirbyteMessageSerializer.dump(message)).decode())
|
|
76
79
|
|
|
77
80
|
@classmethod
|
|
78
81
|
def migrate(cls, args: List[str], source: SourceGithub) -> None:
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/errors_handlers.py
RENAMED
|
@@ -6,10 +6,10 @@ from typing import Optional, Union
|
|
|
6
6
|
|
|
7
7
|
import requests
|
|
8
8
|
|
|
9
|
+
from airbyte_cdk.models import FailureType
|
|
9
10
|
from airbyte_cdk.sources.streams.http import HttpStream
|
|
10
11
|
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
|
|
11
12
|
from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping import DEFAULT_ERROR_MAPPING
|
|
12
|
-
from airbyte_protocol.models import FailureType
|
|
13
13
|
|
|
14
14
|
from . import constants
|
|
15
15
|
|
|
@@ -174,8 +174,8 @@
|
|
|
174
174
|
"properties": {
|
|
175
175
|
"url": { "type": "string" },
|
|
176
176
|
"total_count": { "type": "integer" },
|
|
177
|
-
"
|
|
178
|
-
"
|
|
177
|
+
"plus_one": { "type": "integer" },
|
|
178
|
+
"minus_one": { "type": "integer" },
|
|
179
179
|
"laugh": { "type": "integer" },
|
|
180
180
|
"confused": { "type": "integer" },
|
|
181
181
|
"heart": { "type": "integer" },
|
|
@@ -99,8 +99,8 @@
|
|
|
99
99
|
"properties": {
|
|
100
100
|
"url": { "type": "string" },
|
|
101
101
|
"total_count": { "type": "integer" },
|
|
102
|
-
"
|
|
103
|
-
"
|
|
102
|
+
"plus_one": { "type": "integer" },
|
|
103
|
+
"minus_one": { "type": "integer" },
|
|
104
104
|
"laugh": { "type": "integer" },
|
|
105
105
|
"confused": { "type": "integer" },
|
|
106
106
|
"heart": { "type": "integer" },
|
|
@@ -752,10 +752,10 @@
|
|
|
752
752
|
"total_count": {
|
|
753
753
|
"type": "integer"
|
|
754
754
|
},
|
|
755
|
-
"
|
|
755
|
+
"plus_one": {
|
|
756
756
|
"type": "integer"
|
|
757
757
|
},
|
|
758
|
-
"
|
|
758
|
+
"minus_one": {
|
|
759
759
|
"type": "integer"
|
|
760
760
|
},
|
|
761
761
|
"laugh": {
|
|
@@ -788,10 +788,10 @@
|
|
|
788
788
|
"total_count": {
|
|
789
789
|
"type": "integer"
|
|
790
790
|
},
|
|
791
|
-
"
|
|
791
|
+
"plus_one": {
|
|
792
792
|
"type": "integer"
|
|
793
793
|
},
|
|
794
|
-
"
|
|
794
|
+
"minus_one": {
|
|
795
795
|
"type": "integer"
|
|
796
796
|
},
|
|
797
797
|
"laugh": {
|
|
@@ -9,6 +9,7 @@ from urllib.parse import urlparse
|
|
|
9
9
|
from airbyte_cdk.models import FailureType
|
|
10
10
|
from airbyte_cdk.sources import AbstractSource
|
|
11
11
|
from airbyte_cdk.sources.streams import Stream
|
|
12
|
+
from airbyte_cdk.sources.streams.http.http_client import MessageRepresentationAirbyteTracedErrors
|
|
12
13
|
from airbyte_cdk.sources.streams.http.requests_native_auth import MultipleTokenAuthenticator
|
|
13
14
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
|
14
15
|
from source_github.utils import MultipleTokenAuthenticatorWithRateLimiter
|
|
@@ -184,7 +185,7 @@ class SourceGithub(AbstractSource):
|
|
|
184
185
|
# 404 Client Error: Not Found for url: https://api.github.com/orgs/airbytehqBLA/repos?per_page=100
|
|
185
186
|
org_name = message.split("https://api.github.com/orgs/")[1].split("/")[0]
|
|
186
187
|
user_message = f'Organization name: "{org_name}" is unknown, "repository" config option should be updated. Please validate your repository config.'
|
|
187
|
-
elif "401 Client Error: Unauthorized for url" in message:
|
|
188
|
+
elif "401 Client Error: Unauthorized for url" in message or ("Error: Unauthorized" in message and "401" in message):
|
|
188
189
|
# 401 Client Error: Unauthorized for url: https://api.github.com/orgs/datarootsio/repos?per_page=100&sort=updated&direction=desc
|
|
189
190
|
user_message = (
|
|
190
191
|
"Github credentials have expired or changed, please review your credentials and re-authenticate or renew your access token."
|
|
@@ -203,6 +204,9 @@ class SourceGithub(AbstractSource):
|
|
|
203
204
|
)
|
|
204
205
|
return True, None
|
|
205
206
|
|
|
207
|
+
except MessageRepresentationAirbyteTracedErrors as e:
|
|
208
|
+
user_message = self.user_friendly_error_message(e.message)
|
|
209
|
+
return False, user_message or e.message
|
|
206
210
|
except Exception as e:
|
|
207
211
|
message = repr(e)
|
|
208
212
|
user_message = self.user_friendly_error_message(message)
|
|
@@ -4,14 +4,15 @@
|
|
|
4
4
|
|
|
5
5
|
import re
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
|
+
from datetime import timedelta, timezone
|
|
7
8
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
|
8
9
|
from urllib import parse
|
|
9
10
|
|
|
10
|
-
import pendulum
|
|
11
11
|
import requests
|
|
12
|
+
from dateutil.parser import parse as date_parse
|
|
12
13
|
|
|
13
14
|
from airbyte_cdk import BackoffStrategy, StreamSlice
|
|
14
|
-
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode
|
|
15
|
+
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level, SyncMode
|
|
15
16
|
from airbyte_cdk.models import Type as MessageType
|
|
16
17
|
from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
|
|
17
18
|
from airbyte_cdk.sources.streams.checkpoint.substream_resumable_full_refresh_cursor import SubstreamResumableFullRefreshCursor
|
|
@@ -19,8 +20,9 @@ from airbyte_cdk.sources.streams.core import CheckpointMixin, Stream
|
|
|
19
20
|
from airbyte_cdk.sources.streams.http import HttpStream
|
|
20
21
|
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
|
|
21
22
|
from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException, UserDefinedBackoffException
|
|
23
|
+
from airbyte_cdk.sources.streams.http.http_client import MessageRepresentationAirbyteTracedErrors
|
|
22
24
|
from airbyte_cdk.utils import AirbyteTracedException
|
|
23
|
-
from
|
|
25
|
+
from airbyte_cdk.utils.datetime_helpers import ab_datetime_format, ab_datetime_parse
|
|
24
26
|
|
|
25
27
|
from . import constants
|
|
26
28
|
from .backoff_strategies import ContributorActivityBackoffStrategy, GithubStreamABCBackoffStrategy
|
|
@@ -128,11 +130,14 @@ class GithubStreamABC(HttpStream, ABC):
|
|
|
128
130
|
# Reading records while handling the errors
|
|
129
131
|
try:
|
|
130
132
|
yield from super().read_records(stream_slice=stream_slice, **kwargs)
|
|
131
|
-
|
|
133
|
+
# HTTP Client wraps DefaultBackoffException into MessageRepresentationAirbyteTracedErrors
|
|
134
|
+
except MessageRepresentationAirbyteTracedErrors as e:
|
|
132
135
|
# This whole try/except situation in `read_records()` isn't good but right now in `self._send_request()`
|
|
133
136
|
# function we have `response.raise_for_status()` so we don't have much choice on how to handle errors.
|
|
134
137
|
# Bocked on https://github.com/airbytehq/airbyte/issues/3514.
|
|
135
|
-
if e
|
|
138
|
+
if not hasattr(e, "_exception") and not hasattr(e._exception, "response"):
|
|
139
|
+
raise e
|
|
140
|
+
if e._exception.response.status_code == requests.codes.NOT_FOUND:
|
|
136
141
|
# A lot of streams are not available for repositories owned by a user instead of an organization.
|
|
137
142
|
if isinstance(self, Organizations):
|
|
138
143
|
error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for organization `{organisation}`."
|
|
@@ -140,8 +145,8 @@ class GithubStreamABC(HttpStream, ABC):
|
|
|
140
145
|
error_msg = f"Syncing `{self.__class__.__name__}` stream for organization `{organisation}`, team `{stream_slice.get('team_slug')}` and user `{stream_slice.get('username')}` isn't available: User has no team membership. Skipping..."
|
|
141
146
|
else:
|
|
142
147
|
error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{repository}`."
|
|
143
|
-
elif e.response.status_code == requests.codes.FORBIDDEN:
|
|
144
|
-
error_msg = str(e.response.json().get("message"))
|
|
148
|
+
elif e._exception.response.status_code == requests.codes.FORBIDDEN:
|
|
149
|
+
error_msg = str(e._exception.response.json().get("message"))
|
|
145
150
|
# When using the `check_connection` method, we should raise an error if we do not have access to the repository.
|
|
146
151
|
if isinstance(self, Repositories):
|
|
147
152
|
raise e
|
|
@@ -157,27 +162,27 @@ class GithubStreamABC(HttpStream, ABC):
|
|
|
157
162
|
error_msg = (
|
|
158
163
|
f"Syncing `{self.name}` stream isn't available for repository `{repository}`. Full error message: {error_msg}"
|
|
159
164
|
)
|
|
160
|
-
elif e.response.status_code == requests.codes.UNAUTHORIZED:
|
|
165
|
+
elif e._exception.response.status_code == requests.codes.UNAUTHORIZED:
|
|
161
166
|
if self.access_token_type == constants.PERSONAL_ACCESS_TOKEN_TITLE:
|
|
162
|
-
error_msg = str(e.response.json().get("message"))
|
|
167
|
+
error_msg = str(e._exception.response.json().get("message"))
|
|
163
168
|
self.logger.error(f"{self.access_token_type} renewal is required: {error_msg}")
|
|
164
169
|
raise e
|
|
165
|
-
elif e.response.status_code == requests.codes.GONE and isinstance(self, Projects):
|
|
170
|
+
elif e._exception.response.status_code == requests.codes.GONE and isinstance(self, Projects):
|
|
166
171
|
# Some repos don't have projects enabled and we we get "410 Client Error: Gone for
|
|
167
172
|
# url: https://api.github.com/repos/xyz/projects?per_page=100" error.
|
|
168
173
|
error_msg = f"Syncing `Projects` stream isn't available for repository `{stream_slice['repository']}`."
|
|
169
|
-
elif e.response.status_code == requests.codes.CONFLICT:
|
|
174
|
+
elif e._exception.response.status_code == requests.codes.CONFLICT:
|
|
170
175
|
error_msg = (
|
|
171
176
|
f"Syncing `{self.name}` stream isn't available for repository "
|
|
172
177
|
f"`{stream_slice['repository']}`, it seems like this repository is empty."
|
|
173
178
|
)
|
|
174
|
-
elif e.response.status_code == requests.codes.SERVER_ERROR and isinstance(self, WorkflowRuns):
|
|
179
|
+
elif e._exception.response.status_code == requests.codes.SERVER_ERROR and isinstance(self, WorkflowRuns):
|
|
175
180
|
error_msg = f"Syncing `{self.name}` stream isn't available for repository `{stream_slice['repository']}`."
|
|
176
|
-
elif e.response.status_code == requests.codes.BAD_GATEWAY:
|
|
181
|
+
elif e._exception.response.status_code == requests.codes.BAD_GATEWAY:
|
|
177
182
|
error_msg = f"Stream {self.name} temporary failed. Try to re-run sync later"
|
|
178
183
|
else:
|
|
179
184
|
# most probably here we're facing a 500 server error and a risk to get a non-json response, so lets output response.text
|
|
180
|
-
self.logger.error(f"Undefined error while reading records: {e.response.text}")
|
|
185
|
+
self.logger.error(f"Undefined error while reading records: {e._exception.response.text}")
|
|
181
186
|
raise e
|
|
182
187
|
|
|
183
188
|
self.logger.warning(error_msg)
|
|
@@ -216,6 +221,14 @@ class GithubStream(GithubStreamABC):
|
|
|
216
221
|
|
|
217
222
|
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
|
218
223
|
record["repository"] = stream_slice["repository"]
|
|
224
|
+
|
|
225
|
+
if "reactions" in record and record["reactions"]:
|
|
226
|
+
reactions = record["reactions"]
|
|
227
|
+
if "+1" in reactions:
|
|
228
|
+
reactions["plus_one"] = reactions.pop("+1")
|
|
229
|
+
if "-1" in reactions:
|
|
230
|
+
reactions["minus_one"] = reactions.pop("-1")
|
|
231
|
+
|
|
219
232
|
return record
|
|
220
233
|
|
|
221
234
|
def parse_response(
|
|
@@ -1437,7 +1450,8 @@ class Workflows(SemiIncrementalMixin, GithubStream):
|
|
|
1437
1450
|
yield self.transform(record=record, stream_slice=stream_slice)
|
|
1438
1451
|
|
|
1439
1452
|
def convert_cursor_value(self, value):
|
|
1440
|
-
|
|
1453
|
+
parsed_value = date_parse(value).astimezone(timezone.utc)
|
|
1454
|
+
return ab_datetime_format(parsed_value, "%Y-%m-%dT%H:%M:%SZ")
|
|
1441
1455
|
|
|
1442
1456
|
|
|
1443
1457
|
class WorkflowRuns(SemiIncrementalMixin, GithubStream):
|
|
@@ -1478,7 +1492,7 @@ class WorkflowRuns(SemiIncrementalMixin, GithubStream):
|
|
|
1478
1492
|
# the state is updated only in the end of the sync as records are sorted in reverse order
|
|
1479
1493
|
new_state = self.state
|
|
1480
1494
|
if start_point:
|
|
1481
|
-
break_point = (
|
|
1495
|
+
break_point = (ab_datetime_parse(start_point) - timedelta(days=self.re_run_period)).isoformat()
|
|
1482
1496
|
for record in super(SemiIncrementalMixin, self).read_records(
|
|
1483
1497
|
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
|
1484
1498
|
):
|
|
@@ -1663,22 +1677,24 @@ class ContributorActivity(GithubStream):
|
|
|
1663
1677
|
repository = stream_slice.get("repository", "")
|
|
1664
1678
|
try:
|
|
1665
1679
|
yield from super().read_records(stream_slice=stream_slice, **kwargs)
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1680
|
+
# HTTP Client wraps BackoffException into MessageRepresentationAirbyteTracedErrors
|
|
1681
|
+
except MessageRepresentationAirbyteTracedErrors as e:
|
|
1682
|
+
if hasattr(e, "_exception") and hasattr(e._exception, "response"):
|
|
1683
|
+
if e._exception.response.status_code == requests.codes.ACCEPTED:
|
|
1684
|
+
yield AirbyteMessage(
|
|
1685
|
+
type=MessageType.LOG,
|
|
1686
|
+
log=AirbyteLogMessage(
|
|
1687
|
+
level=Level.INFO,
|
|
1688
|
+
message=f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{repository}`.",
|
|
1689
|
+
),
|
|
1690
|
+
)
|
|
1675
1691
|
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1692
|
+
# In order to retain the existing stream behavior before we added RFR to this stream, we need to close out the
|
|
1693
|
+
# partition after we give up the maximum number of retries on the 202 response. This does lead to the question
|
|
1694
|
+
# of if we should prematurely exit in the first place, but for now we're going to aim for feature parity
|
|
1695
|
+
partition_obj = stream_slice.get("partition")
|
|
1696
|
+
if self.cursor and partition_obj:
|
|
1697
|
+
self.cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition_obj))
|
|
1682
1698
|
else:
|
|
1683
1699
|
raise e
|
|
1684
1700
|
|
|
@@ -1,19 +1,22 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
3
|
#
|
|
4
|
-
|
|
4
|
+
import logging
|
|
5
5
|
import time
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
+
from datetime import timedelta
|
|
7
8
|
from itertools import cycle
|
|
8
9
|
from typing import Any, List, Mapping
|
|
9
10
|
|
|
10
|
-
import pendulum
|
|
11
11
|
import requests
|
|
12
12
|
|
|
13
|
-
from airbyte_cdk.models import SyncMode
|
|
13
|
+
from airbyte_cdk.models import FailureType, SyncMode
|
|
14
14
|
from airbyte_cdk.sources.streams import Stream
|
|
15
|
+
from airbyte_cdk.sources.streams.http import HttpClient
|
|
15
16
|
from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator
|
|
16
17
|
from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator
|
|
18
|
+
from airbyte_cdk.utils import AirbyteTracedException
|
|
19
|
+
from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
def getter(D: dict, key_or_keys, strict=True):
|
|
@@ -43,8 +46,8 @@ class GitHubAPILimitException(Exception):
|
|
|
43
46
|
class Token:
|
|
44
47
|
count_rest: int = 5000
|
|
45
48
|
count_graphql: int = 5000
|
|
46
|
-
reset_at_rest:
|
|
47
|
-
reset_at_graphql:
|
|
49
|
+
reset_at_rest: AirbyteDateTime = ab_datetime_now()
|
|
50
|
+
reset_at_graphql: AirbyteDateTime = ab_datetime_now()
|
|
48
51
|
|
|
49
52
|
|
|
50
53
|
class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
|
|
@@ -55,17 +58,33 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
|
|
|
55
58
|
the first token becomes available again.
|
|
56
59
|
"""
|
|
57
60
|
|
|
58
|
-
DURATION =
|
|
61
|
+
DURATION = timedelta(seconds=3600) # Duration at which the current rate limit window resets
|
|
59
62
|
|
|
60
63
|
def __init__(self, tokens: List[str], auth_method: str = "token", auth_header: str = "Authorization"):
|
|
64
|
+
self._logger = logging.getLogger("airbyte")
|
|
61
65
|
self._auth_method = auth_method
|
|
62
66
|
self._auth_header = auth_header
|
|
63
67
|
self._tokens = {t: Token() for t in tokens}
|
|
68
|
+
# It would've been nice to instantiate a single client on this authenticator. However, we are checking
|
|
69
|
+
# the limits of each token which is associated with a TokenAuthenticator. And each HttpClient can only
|
|
70
|
+
# correspond to one authenticator.
|
|
71
|
+
self._token_to_http_client: Mapping[str, HttpClient] = self._initialize_http_clients(tokens)
|
|
64
72
|
self.check_all_tokens()
|
|
65
73
|
self._tokens_iter = cycle(self._tokens)
|
|
66
74
|
self._active_token = next(self._tokens_iter)
|
|
67
75
|
self._max_time = 60 * 10 # 10 minutes as default
|
|
68
76
|
|
|
77
|
+
def _initialize_http_clients(self, tokens: List[str]) -> Mapping[str, HttpClient]:
|
|
78
|
+
return {
|
|
79
|
+
token: HttpClient(
|
|
80
|
+
name="token_validator",
|
|
81
|
+
logger=self._logger,
|
|
82
|
+
authenticator=TokenAuthenticator(token, auth_method=self._auth_method),
|
|
83
|
+
use_cache=False, # We don't want to reuse cached valued because rate limit values change frequently
|
|
84
|
+
)
|
|
85
|
+
for token in tokens
|
|
86
|
+
}
|
|
87
|
+
|
|
69
88
|
@property
|
|
70
89
|
def auth_header(self) -> str:
|
|
71
90
|
return self._auth_header
|
|
@@ -113,25 +132,38 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
|
|
|
113
132
|
|
|
114
133
|
def _check_token_limits(self, token: str):
|
|
115
134
|
"""check that token is not limited"""
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
135
|
+
|
|
136
|
+
http_client = self._token_to_http_client.get(token)
|
|
137
|
+
if not http_client:
|
|
138
|
+
raise ValueError("No HttpClient was initialized for this token. This is unexpected. Please contact Airbyte support.")
|
|
139
|
+
|
|
140
|
+
_, response = http_client.send_request(
|
|
141
|
+
http_method="GET",
|
|
142
|
+
url="https://api.github.com/rate_limit",
|
|
143
|
+
headers={"Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28"},
|
|
144
|
+
request_kwargs={},
|
|
123
145
|
)
|
|
146
|
+
|
|
147
|
+
response_body = response.json()
|
|
148
|
+
if "resources" not in response_body:
|
|
149
|
+
raise AirbyteTracedException(
|
|
150
|
+
failure_type=FailureType.config_error,
|
|
151
|
+
internal_message=f"Token rate limit info response did not contain expected key: resources",
|
|
152
|
+
message="Unable to validate token. Please double check that specified authentication tokens are correct",
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
rate_limit_info = response_body.get("resources")
|
|
124
156
|
token_info = self._tokens[token]
|
|
125
157
|
remaining_info_core = rate_limit_info.get("core")
|
|
126
158
|
token_info.count_rest, token_info.reset_at_rest = (
|
|
127
159
|
remaining_info_core.get("remaining"),
|
|
128
|
-
|
|
160
|
+
ab_datetime_parse(remaining_info_core.get("reset")),
|
|
129
161
|
)
|
|
130
162
|
|
|
131
163
|
remaining_info_graphql = rate_limit_info.get("graphql")
|
|
132
164
|
token_info.count_graphql, token_info.reset_at_graphql = (
|
|
133
165
|
remaining_info_graphql.get("remaining"),
|
|
134
|
-
|
|
166
|
+
ab_datetime_parse(remaining_info_graphql.get("reset")),
|
|
135
167
|
)
|
|
136
168
|
|
|
137
169
|
def check_all_tokens(self):
|
|
@@ -143,7 +175,7 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
|
|
|
143
175
|
setattr(current_token, count_attr, getattr(current_token, count_attr) - 1)
|
|
144
176
|
return True
|
|
145
177
|
elif all(getattr(x, count_attr) == 0 for x in self._tokens.values()):
|
|
146
|
-
min_time_to_wait = min((getattr(x, reset_attr) -
|
|
178
|
+
min_time_to_wait = min((getattr(x, reset_attr) - ab_datetime_now()).total_seconds() for x in self._tokens.values())
|
|
147
179
|
if min_time_to_wait < self.max_time:
|
|
148
180
|
time.sleep(min_time_to_wait if min_time_to_wait > 0 else 0)
|
|
149
181
|
self.check_all_tokens()
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/backoff_strategies.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/assignees.json
RENAMED
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/branches.json
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/comments.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/commits.json
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/deployments.json
RENAMED
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/events.json
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_events.json
RENAMED
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_labels.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issues.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/projects.json
RENAMED
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/projects_v2.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/releases.json
RENAMED
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/repositories.json
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/reviews.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/user.json
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/stargazers.json
RENAMED
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/tags.json
RENAMED
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/team_members.json
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/teams.json
RENAMED
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/users.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/workflows.json
RENAMED
|
File without changes
|
|
File without changes
|