airbyte-source-github 1.8.13__tar.gz → 2.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/PKG-INFO +7 -5
  2. airbyte_source_github-2.1.5/pyproject.toml +48 -0
  3. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/__init__.py +1 -0
  4. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/backoff_strategies.py +1 -0
  5. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/config_migrations.py +7 -7
  6. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/errors_handlers.py +18 -1
  7. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/github_schema.py +1 -0
  8. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/comment.json +2 -2
  9. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/commented.json +2 -2
  10. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/cross_referenced.json +4 -4
  11. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/shared/reactions.json +2 -2
  12. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/source.py +5 -2
  13. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/streams.py +74 -40
  14. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/utils.py +53 -19
  15. airbyte_source_github-1.8.13/pyproject.toml +0 -36
  16. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/README.md +0 -0
  17. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/constants.py +0 -0
  18. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/graphql.py +1 -1
  19. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/run.py +0 -0
  20. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/assignees.json +0 -0
  21. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/branches.json +0 -0
  22. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/collaborators.json +0 -0
  23. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/comments.json +0 -0
  24. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/commit_comment_reactions.json +0 -0
  25. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/commit_comments.json +0 -0
  26. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/commits.json +0 -0
  27. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/contributor_activity.json +0 -0
  28. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/deployments.json +0 -0
  29. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/events.json +0 -0
  30. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/issue_comment_reactions.json +0 -0
  31. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/issue_events.json +0 -0
  32. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/issue_labels.json +0 -0
  33. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/issue_milestones.json +0 -0
  34. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/issue_reactions.json +0 -0
  35. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/issue_timeline_events.json +0 -0
  36. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/issues.json +0 -0
  37. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/organizations.json +0 -0
  38. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/project_cards.json +0 -0
  39. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/project_columns.json +0 -0
  40. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/projects.json +0 -0
  41. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/projects_v2.json +0 -0
  42. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/pull_request_comment_reactions.json +0 -0
  43. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/pull_request_commits.json +0 -0
  44. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/pull_request_stats.json +0 -0
  45. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/pull_requests.json +0 -0
  46. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/releases.json +0 -0
  47. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/repositories.json +0 -0
  48. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/review_comments.json +0 -0
  49. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/reviews.json +0 -0
  50. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/committed.json +0 -0
  51. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/reviewed.json +0 -0
  52. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/shared/reaction.json +0 -0
  53. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/shared/user.json +0 -0
  54. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/shared/user_graphql.json +0 -0
  55. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/stargazers.json +0 -0
  56. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/tags.json +0 -0
  57. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/team_members.json +0 -0
  58. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/team_memberships.json +0 -0
  59. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/teams.json +0 -0
  60. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/users.json +0 -0
  61. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/workflow_jobs.json +0 -0
  62. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/workflow_runs.json +0 -0
  63. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/schemas/workflows.json +0 -0
  64. {airbyte_source_github-1.8.13 → airbyte_source_github-2.1.5}/source_github/spec.json +0 -0
@@ -1,17 +1,19 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-github
3
- Version: 1.8.13
3
+ Version: 2.1.5
4
4
  Summary: Source implementation for GitHub.
5
5
  Home-page: https://airbyte.com
6
- License: MIT
6
+ License: ELv2
7
7
  Author: Airbyte
8
8
  Author-email: contact@airbyte.io
9
- Requires-Python: >=3.10,<3.12
10
- Classifier: License :: OSI Approved :: MIT License
9
+ Requires-Python: >=3.10,<3.14
10
+ Classifier: License :: Other/Proprietary License
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
- Requires-Dist: airbyte-cdk (>=4,<5)
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: airbyte-cdk (>=7.4.1,<8.0.0)
15
17
  Requires-Dist: sgqlc (==16.3)
16
18
  Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/github
17
19
  Project-URL: Repository, https://github.com/airbytehq/airbyte
@@ -0,0 +1,48 @@
1
+ [build-system]
2
+ requires = [ "poetry-core>=1.0.0",]
3
+ build-backend = "poetry.core.masonry.api"
4
+
5
+ [tool.poetry]
6
+ version = "2.1.5"
7
+ name = "airbyte-source-github"
8
+ description = "Source implementation for GitHub."
9
+ authors = [ "Airbyte <contact@airbyte.io>",]
10
+ license = "ELv2"
11
+ readme = "README.md"
12
+ documentation = "https://docs.airbyte.com/integrations/sources/github"
13
+ homepage = "https://airbyte.com"
14
+ repository = "https://github.com/airbytehq/airbyte"
15
+ [[tool.poetry.packages]]
16
+ include = "source_github"
17
+
18
+ [tool.poetry.dependencies]
19
+ python = "^3.10,<3.14"
20
+ airbyte-cdk = "^7.4.1"
21
+ sgqlc = "==16.3"
22
+
23
+ [tool.poetry.scripts]
24
+ source-github = "source_github.run:run"
25
+
26
+ [tool.poetry.group.dev.dependencies]
27
+ requests-mock = "^1.9.3"
28
+ freezegun = "^1.2"
29
+ pytest-mock = "^3.6.1"
30
+ pytest = "^8.0.0"
31
+ responses = "^0.23.1"
32
+
33
+
34
+ [tool.poe]
35
+ include = [
36
+ # Shared tasks definition file(s) can be imported here.
37
+ # Run `poe` or `poe --help` to see the list of available tasks.
38
+ "${POE_GIT_DIR}/poe-tasks/poetry-connector-tasks.toml",
39
+ ]
40
+
41
+ [tool.poe.tasks]
42
+ # This overrides the default task for running unit tests specifically for the source-github connector.
43
+ # This is a workaround to an issue that arises when running all of the unit tests in one execution of pytest
44
+ # due to incompatibilities between request mocking libraries requests-mock and responses.
45
+ test-unit-tests.shell = '''
46
+ poetry run pytest --junitxml=build/test-results/pytest-unit-tests-junit.xml --ignore=unit_tests/integration/ unit_tests
47
+ poetry run pytest --junitxml=build/test-results/pytest-unit-integration-tests-junit.xml unit_tests/integration/
48
+ '''
@@ -24,4 +24,5 @@ SOFTWARE.
24
24
 
25
25
  from .source import SourceGithub
26
26
 
27
+
27
28
  __all__ = ["SourceGithub"]
@@ -6,6 +6,7 @@ import time
6
6
  from typing import Any, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk import BackoffStrategy
10
11
  from airbyte_cdk.sources.streams.http import HttpStream
11
12
 
@@ -6,12 +6,16 @@ import logging
6
6
  from abc import ABC
7
7
  from typing import Any, List, Mapping
8
8
 
9
+ import orjson
10
+
9
11
  from airbyte_cdk.config_observation import create_connector_config_control_message
10
12
  from airbyte_cdk.entrypoint import AirbyteEntrypoint
13
+ from airbyte_cdk.models import AirbyteMessageSerializer
11
14
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
12
15
 
13
16
  from .source import SourceGithub
14
17
 
18
+
15
19
  logger = logging.getLogger("airbyte_logger")
16
20
 
17
21
 
@@ -30,13 +34,11 @@ class MigrateStringToArray(ABC):
30
34
 
31
35
  @property
32
36
  @abc.abstractmethod
33
- def migrate_from_key(self) -> str:
34
- ...
37
+ def migrate_from_key(self) -> str: ...
35
38
 
36
39
  @property
37
40
  @abc.abstractmethod
38
- def migrate_to_key(self) -> str:
39
- ...
41
+ def migrate_to_key(self) -> str: ...
40
42
 
41
43
  @classmethod
42
44
  def _should_migrate(cls, config: Mapping[str, Any]) -> bool:
@@ -73,7 +75,7 @@ class MigrateStringToArray(ABC):
73
75
  cls.message_repository.emit_message(create_connector_config_control_message(migrated_config))
74
76
  # emit the Airbyte Control Message from message queue to stdout
75
77
  for message in cls.message_repository._message_queue:
76
- print(message.json(exclude_unset=True))
78
+ print(orjson.dumps(AirbyteMessageSerializer.dump(message)).decode())
77
79
 
78
80
  @classmethod
79
81
  def migrate(cls, args: List[str], source: SourceGithub) -> None:
@@ -95,12 +97,10 @@ class MigrateStringToArray(ABC):
95
97
 
96
98
 
97
99
  class MigrateRepository(MigrateStringToArray):
98
-
99
100
  migrate_from_key: str = "repository"
100
101
  migrate_to_key: str = "repositories"
101
102
 
102
103
 
103
104
  class MigrateBranch(MigrateStringToArray):
104
-
105
105
  migrate_from_key: str = "branch"
106
106
  migrate_to_key: str = "branches"
@@ -5,13 +5,15 @@
5
5
  from typing import Optional, Union
6
6
 
7
7
  import requests
8
+
9
+ from airbyte_cdk.models import FailureType
8
10
  from airbyte_cdk.sources.streams.http import HttpStream
9
11
  from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
10
12
  from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping import DEFAULT_ERROR_MAPPING
11
- from airbyte_protocol.models import FailureType
12
13
 
13
14
  from . import constants
14
15
 
16
+
15
17
  GITHUB_DEFAULT_ERROR_MAPPING = DEFAULT_ERROR_MAPPING | {
16
18
  401: ErrorResolution(
17
19
  response_action=ResponseAction.RETRY,
@@ -41,6 +43,13 @@ GITHUB_DEFAULT_ERROR_MAPPING = DEFAULT_ERROR_MAPPING | {
41
43
  }
42
44
 
43
45
 
46
+ def is_conflict_with_empty_repository(response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> bool:
47
+ if isinstance(response_or_exception, requests.Response) and response_or_exception.status_code == requests.codes.CONFLICT:
48
+ response_data = response_or_exception.json()
49
+ return response_data.get("message") == "Git Repository is empty."
50
+ return False
51
+
52
+
44
53
  class GithubStreamABCErrorHandler(HttpStatusErrorHandler):
45
54
  def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa
46
55
  self.stream = stream
@@ -86,6 +95,14 @@ class GithubStreamABCErrorHandler(HttpStatusErrorHandler):
86
95
  error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
87
96
  )
88
97
 
98
+ if is_conflict_with_empty_repository(response_or_exception=response_or_exception):
99
+ log_message = f"Ignoring response for '{response_or_exception.request.method}' request to '{response_or_exception.url}' with response code '{response_or_exception.status_code}' as the repository is empty."
100
+ return ErrorResolution(
101
+ response_action=ResponseAction.IGNORE,
102
+ failure_type=FailureType.config_error,
103
+ error_message=log_message,
104
+ )
105
+
89
106
  return super().interpret_response(response_or_exception)
90
107
 
91
108
 
@@ -6,6 +6,7 @@ import sgqlc.types
6
6
  import sgqlc.types.datetime
7
7
  import sgqlc.types.relay
8
8
 
9
+
9
10
  github_schema = sgqlc.types.Schema()
10
11
 
11
12
 
@@ -174,8 +174,8 @@
174
174
  "properties": {
175
175
  "url": { "type": "string" },
176
176
  "total_count": { "type": "integer" },
177
- "+1": { "type": "integer" },
178
- "-1": { "type": "integer" },
177
+ "plus_one": { "type": "integer" },
178
+ "minus_one": { "type": "integer" },
179
179
  "laugh": { "type": "integer" },
180
180
  "confused": { "type": "integer" },
181
181
  "heart": { "type": "integer" },
@@ -99,8 +99,8 @@
99
99
  "properties": {
100
100
  "url": { "type": "string" },
101
101
  "total_count": { "type": "integer" },
102
- "+1": { "type": "integer" },
103
- "-1": { "type": "integer" },
102
+ "plus_one": { "type": "integer" },
103
+ "minus_one": { "type": "integer" },
104
104
  "laugh": { "type": "integer" },
105
105
  "confused": { "type": "integer" },
106
106
  "heart": { "type": "integer" },
@@ -752,10 +752,10 @@
752
752
  "total_count": {
753
753
  "type": "integer"
754
754
  },
755
- "+1": {
755
+ "plus_one": {
756
756
  "type": "integer"
757
757
  },
758
- "-1": {
758
+ "minus_one": {
759
759
  "type": "integer"
760
760
  },
761
761
  "laugh": {
@@ -788,10 +788,10 @@
788
788
  "total_count": {
789
789
  "type": "integer"
790
790
  },
791
- "+1": {
791
+ "plus_one": {
792
792
  "type": "integer"
793
793
  },
794
- "-1": {
794
+ "minus_one": {
795
795
  "type": "integer"
796
796
  },
797
797
  "laugh": {
@@ -7,10 +7,10 @@
7
7
  "total_count": {
8
8
  "type": ["null", "integer"]
9
9
  },
10
- "+1": {
10
+ "plus_one": {
11
11
  "type": ["null", "integer"]
12
12
  },
13
- "-1": {
13
+ "minus_one": {
14
14
  "type": ["null", "integer"]
15
15
  },
16
16
  "laugh": {
@@ -9,6 +9,7 @@ from urllib.parse import urlparse
9
9
  from airbyte_cdk.models import FailureType
10
10
  from airbyte_cdk.sources import AbstractSource
11
11
  from airbyte_cdk.sources.streams import Stream
12
+ from airbyte_cdk.sources.streams.http.http_client import MessageRepresentationAirbyteTracedErrors
12
13
  from airbyte_cdk.sources.streams.http.requests_native_auth import MultipleTokenAuthenticator
13
14
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
14
15
  from source_github.utils import MultipleTokenAuthenticatorWithRateLimiter
@@ -60,7 +61,6 @@ from .utils import read_full_refresh
60
61
 
61
62
 
62
63
  class SourceGithub(AbstractSource):
63
-
64
64
  continue_sync_on_stream_failure = True
65
65
 
66
66
  @staticmethod
@@ -185,7 +185,7 @@ class SourceGithub(AbstractSource):
185
185
  # 404 Client Error: Not Found for url: https://api.github.com/orgs/airbytehqBLA/repos?per_page=100
186
186
  org_name = message.split("https://api.github.com/orgs/")[1].split("/")[0]
187
187
  user_message = f'Organization name: "{org_name}" is unknown, "repository" config option should be updated. Please validate your repository config.'
188
- elif "401 Client Error: Unauthorized for url" in message:
188
+ elif "401 Client Error: Unauthorized for url" in message or ("Error: Unauthorized" in message and "401" in message):
189
189
  # 401 Client Error: Unauthorized for url: https://api.github.com/orgs/datarootsio/repos?per_page=100&sort=updated&direction=desc
190
190
  user_message = (
191
191
  "Github credentials have expired or changed, please review your credentials and re-authenticate or renew your access token."
@@ -204,6 +204,9 @@ class SourceGithub(AbstractSource):
204
204
  )
205
205
  return True, None
206
206
 
207
+ except MessageRepresentationAirbyteTracedErrors as e:
208
+ user_message = self.user_friendly_error_message(e.message)
209
+ return False, user_message or e.message
207
210
  except Exception as e:
208
211
  message = repr(e)
209
212
  user_message = self.user_friendly_error_message(message)
@@ -4,13 +4,15 @@
4
4
 
5
5
  import re
6
6
  from abc import ABC, abstractmethod
7
+ from datetime import timedelta, timezone
7
8
  from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
8
9
  from urllib import parse
9
10
 
10
- import pendulum
11
11
  import requests
12
+ from dateutil.parser import parse as date_parse
13
+
12
14
  from airbyte_cdk import BackoffStrategy, StreamSlice
13
- from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode
15
+ from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level, SyncMode
14
16
  from airbyte_cdk.models import Type as MessageType
15
17
  from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
16
18
  from airbyte_cdk.sources.streams.checkpoint.substream_resumable_full_refresh_cursor import SubstreamResumableFullRefreshCursor
@@ -18,8 +20,9 @@ from airbyte_cdk.sources.streams.core import CheckpointMixin, Stream
18
20
  from airbyte_cdk.sources.streams.http import HttpStream
19
21
  from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
20
22
  from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException, UserDefinedBackoffException
23
+ from airbyte_cdk.sources.streams.http.http_client import MessageRepresentationAirbyteTracedErrors
21
24
  from airbyte_cdk.utils import AirbyteTracedException
22
- from airbyte_protocol.models import FailureType
25
+ from airbyte_cdk.utils.datetime_helpers import ab_datetime_format, ab_datetime_parse
23
26
 
24
27
  from . import constants
25
28
  from .backoff_strategies import ContributorActivityBackoffStrategy, GithubStreamABCBackoffStrategy
@@ -28,6 +31,7 @@ from .errors_handlers import (
28
31
  ContributorActivityErrorHandler,
29
32
  GitHubGraphQLErrorHandler,
30
33
  GithubStreamABCErrorHandler,
34
+ is_conflict_with_empty_repository,
31
35
  )
32
36
  from .graphql import (
33
37
  CursorStorage,
@@ -41,7 +45,6 @@ from .utils import GitHubAPILimitException, getter
41
45
 
42
46
 
43
47
  class GithubStreamABC(HttpStream, ABC):
44
-
45
48
  primary_key = "id"
46
49
 
47
50
  # Detect streams with high API load
@@ -80,7 +83,6 @@ class GithubStreamABC(HttpStream, ABC):
80
83
  def request_params(
81
84
  self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
82
85
  ) -> MutableMapping[str, Any]:
83
-
84
86
  params = {"per_page": self.page_size}
85
87
 
86
88
  if next_page_token:
@@ -128,11 +130,14 @@ class GithubStreamABC(HttpStream, ABC):
128
130
  # Reading records while handling the errors
129
131
  try:
130
132
  yield from super().read_records(stream_slice=stream_slice, **kwargs)
131
- except DefaultBackoffException as e:
133
+ # HTTP Client wraps DefaultBackoffException into MessageRepresentationAirbyteTracedErrors
134
+ except MessageRepresentationAirbyteTracedErrors as e:
132
135
  # This whole try/except situation in `read_records()` isn't good but right now in `self._send_request()`
133
136
  # function we have `response.raise_for_status()` so we don't have much choice on how to handle errors.
134
137
  # Bocked on https://github.com/airbytehq/airbyte/issues/3514.
135
- if e.response.status_code == requests.codes.NOT_FOUND:
138
+ if not hasattr(e, "_exception") and not hasattr(e._exception, "response"):
139
+ raise e
140
+ if e._exception.response.status_code == requests.codes.NOT_FOUND:
136
141
  # A lot of streams are not available for repositories owned by a user instead of an organization.
137
142
  if isinstance(self, Organizations):
138
143
  error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for organization `{organisation}`."
@@ -140,8 +145,8 @@ class GithubStreamABC(HttpStream, ABC):
140
145
  error_msg = f"Syncing `{self.__class__.__name__}` stream for organization `{organisation}`, team `{stream_slice.get('team_slug')}` and user `{stream_slice.get('username')}` isn't available: User has no team membership. Skipping..."
141
146
  else:
142
147
  error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{repository}`."
143
- elif e.response.status_code == requests.codes.FORBIDDEN:
144
- error_msg = str(e.response.json().get("message"))
148
+ elif e._exception.response.status_code == requests.codes.FORBIDDEN:
149
+ error_msg = str(e._exception.response.json().get("message"))
145
150
  # When using the `check_connection` method, we should raise an error if we do not have access to the repository.
146
151
  if isinstance(self, Repositories):
147
152
  raise e
@@ -157,27 +162,27 @@ class GithubStreamABC(HttpStream, ABC):
157
162
  error_msg = (
158
163
  f"Syncing `{self.name}` stream isn't available for repository `{repository}`. Full error message: {error_msg}"
159
164
  )
160
- elif e.response.status_code == requests.codes.UNAUTHORIZED:
165
+ elif e._exception.response.status_code == requests.codes.UNAUTHORIZED:
161
166
  if self.access_token_type == constants.PERSONAL_ACCESS_TOKEN_TITLE:
162
- error_msg = str(e.response.json().get("message"))
167
+ error_msg = str(e._exception.response.json().get("message"))
163
168
  self.logger.error(f"{self.access_token_type} renewal is required: {error_msg}")
164
169
  raise e
165
- elif e.response.status_code == requests.codes.GONE and isinstance(self, Projects):
170
+ elif e._exception.response.status_code == requests.codes.GONE and isinstance(self, Projects):
166
171
  # Some repos don't have projects enabled and we we get "410 Client Error: Gone for
167
172
  # url: https://api.github.com/repos/xyz/projects?per_page=100" error.
168
173
  error_msg = f"Syncing `Projects` stream isn't available for repository `{stream_slice['repository']}`."
169
- elif e.response.status_code == requests.codes.CONFLICT:
174
+ elif e._exception.response.status_code == requests.codes.CONFLICT:
170
175
  error_msg = (
171
176
  f"Syncing `{self.name}` stream isn't available for repository "
172
177
  f"`{stream_slice['repository']}`, it seems like this repository is empty."
173
178
  )
174
- elif e.response.status_code == requests.codes.SERVER_ERROR and isinstance(self, WorkflowRuns):
179
+ elif e._exception.response.status_code == requests.codes.SERVER_ERROR and isinstance(self, WorkflowRuns):
175
180
  error_msg = f"Syncing `{self.name}` stream isn't available for repository `{stream_slice['repository']}`."
176
- elif e.response.status_code == requests.codes.BAD_GATEWAY:
181
+ elif e._exception.response.status_code == requests.codes.BAD_GATEWAY:
177
182
  error_msg = f"Stream {self.name} temporary failed. Try to re-run sync later"
178
183
  else:
179
184
  # most probably here we're facing a 500 server error and a risk to get a non-json response, so lets output response.text
180
- self.logger.error(f"Undefined error while reading records: {e.response.text}")
185
+ self.logger.error(f"Undefined error while reading records: {e._exception.response.text}")
181
186
  raise e
182
187
 
183
188
  self.logger.warning(error_msg)
@@ -216,8 +221,34 @@ class GithubStream(GithubStreamABC):
216
221
 
217
222
  def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
218
223
  record["repository"] = stream_slice["repository"]
224
+
225
+ if "reactions" in record and record["reactions"]:
226
+ reactions = record["reactions"]
227
+ if "+1" in reactions:
228
+ reactions["plus_one"] = reactions.pop("+1")
229
+ if "-1" in reactions:
230
+ reactions["minus_one"] = reactions.pop("-1")
231
+
219
232
  return record
220
233
 
234
+ def parse_response(
235
+ self,
236
+ response: requests.Response,
237
+ stream_state: Mapping[str, Any],
238
+ stream_slice: Mapping[str, Any] = None,
239
+ next_page_token: Mapping[str, Any] = None,
240
+ ) -> Iterable[Mapping]:
241
+ if is_conflict_with_empty_repository(response):
242
+ # I would expect that this should be handled (skipped) by the error handler, but it seems like
243
+ # ignored this error but continue to processing records. This may be fixed in latest CDK versions.
244
+ return
245
+ yield from super().parse_response(
246
+ response=response,
247
+ stream_state=stream_state,
248
+ stream_slice=stream_slice,
249
+ next_page_token=next_page_token,
250
+ )
251
+
221
252
 
222
253
  class SemiIncrementalMixin(CheckpointMixin):
223
254
  """
@@ -756,7 +787,6 @@ class ReviewComments(IncrementalMixin, GithubStream):
756
787
 
757
788
 
758
789
  class GitHubGraphQLStream(GithubStream, ABC):
759
-
760
790
  http_method = "POST"
761
791
 
762
792
  def path(
@@ -976,7 +1006,6 @@ class ProjectsV2(SemiIncrementalMixin, GitHubGraphQLStream):
976
1006
 
977
1007
 
978
1008
  class ReactionStream(GithubStream, CheckpointMixin, ABC):
979
-
980
1009
  parent_key = "id"
981
1010
  copy_parent_key = "comment_id"
982
1011
  cursor_field = "created_at"
@@ -1394,9 +1423,9 @@ class ProjectCards(GithubStream):
1394
1423
  stream_state_value = current_stream_state.get(repository, {}).get(project_id, {}).get(column_id, {}).get(self.cursor_field)
1395
1424
  if stream_state_value:
1396
1425
  updated_state = max(updated_state, stream_state_value)
1397
- current_stream_state.setdefault(repository, {}).setdefault(project_id, {}).setdefault(column_id, {})[
1398
- self.cursor_field
1399
- ] = updated_state
1426
+ current_stream_state.setdefault(repository, {}).setdefault(project_id, {}).setdefault(column_id, {})[self.cursor_field] = (
1427
+ updated_state
1428
+ )
1400
1429
  return current_stream_state
1401
1430
 
1402
1431
  def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
@@ -1421,7 +1450,8 @@ class Workflows(SemiIncrementalMixin, GithubStream):
1421
1450
  yield self.transform(record=record, stream_slice=stream_slice)
1422
1451
 
1423
1452
  def convert_cursor_value(self, value):
1424
- return pendulum.parse(value).in_tz(tz="UTC").format("YYYY-MM-DDTHH:mm:ss[Z]")
1453
+ parsed_value = date_parse(value).astimezone(timezone.utc)
1454
+ return ab_datetime_format(parsed_value, "%Y-%m-%dT%H:%M:%SZ")
1425
1455
 
1426
1456
 
1427
1457
  class WorkflowRuns(SemiIncrementalMixin, GithubStream):
@@ -1462,7 +1492,7 @@ class WorkflowRuns(SemiIncrementalMixin, GithubStream):
1462
1492
  # the state is updated only in the end of the sync as records are sorted in reverse order
1463
1493
  new_state = self.state
1464
1494
  if start_point:
1465
- break_point = (pendulum.parse(start_point) - pendulum.duration(days=self.re_run_period)).to_iso8601_string()
1495
+ break_point = (ab_datetime_parse(start_point) - timedelta(days=self.re_run_period)).isoformat()
1466
1496
  for record in super(SemiIncrementalMixin, self).read_records(
1467
1497
  sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
1468
1498
  ):
@@ -1617,11 +1647,13 @@ class ContributorActivity(GithubStream):
1617
1647
 
1618
1648
  def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
1619
1649
  record["repository"] = stream_slice["repository"]
1620
- record.update(record.pop("author"))
1650
+ author = record.pop("author", None)
1651
+ # It's been found that the author field can be None, so we check for it
1652
+ if author:
1653
+ record.update(author)
1621
1654
  return record
1622
1655
 
1623
1656
  def get_error_handler(self) -> Optional[ErrorHandler]:
1624
-
1625
1657
  return ContributorActivityErrorHandler(logger=self.logger, max_retries=5, error_mapping=GITHUB_DEFAULT_ERROR_MAPPING)
1626
1658
 
1627
1659
  def get_backoff_strategy(self) -> Optional[Union[BackoffStrategy, List[BackoffStrategy]]]:
@@ -1645,22 +1677,24 @@ class ContributorActivity(GithubStream):
1645
1677
  repository = stream_slice.get("repository", "")
1646
1678
  try:
1647
1679
  yield from super().read_records(stream_slice=stream_slice, **kwargs)
1648
- except UserDefinedBackoffException as e:
1649
- if e.response.status_code == requests.codes.ACCEPTED:
1650
- yield AirbyteMessage(
1651
- type=MessageType.LOG,
1652
- log=AirbyteLogMessage(
1653
- level=Level.INFO,
1654
- message=f"Syncing `{self.__class__.__name__}` " f"stream isn't available for repository `{repository}`.",
1655
- ),
1656
- )
1680
+ # HTTP Client wraps BackoffException into MessageRepresentationAirbyteTracedErrors
1681
+ except MessageRepresentationAirbyteTracedErrors as e:
1682
+ if hasattr(e, "_exception") and hasattr(e._exception, "response"):
1683
+ if e._exception.response.status_code == requests.codes.ACCEPTED:
1684
+ yield AirbyteMessage(
1685
+ type=MessageType.LOG,
1686
+ log=AirbyteLogMessage(
1687
+ level=Level.INFO,
1688
+ message=f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{repository}`.",
1689
+ ),
1690
+ )
1657
1691
 
1658
- # In order to retain the existing stream behavior before we added RFR to this stream, we need to close out the
1659
- # partition after we give up the maximum number of retries on the 202 response. This does lead to the question
1660
- # of if we should prematurely exit in the first place, but for now we're going to aim for feature parity
1661
- partition_obj = stream_slice.get("partition")
1662
- if self.cursor and partition_obj:
1663
- self.cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition_obj))
1692
+ # In order to retain the existing stream behavior before we added RFR to this stream, we need to close out the
1693
+ # partition after we give up the maximum number of retries on the 202 response. This does lead to the question
1694
+ # of if we should prematurely exit in the first place, but for now we're going to aim for feature parity
1695
+ partition_obj = stream_slice.get("partition")
1696
+ if self.cursor and partition_obj:
1697
+ self.cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition_obj))
1664
1698
  else:
1665
1699
  raise e
1666
1700
 
@@ -1,18 +1,22 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
4
+ import logging
5
5
  import time
6
6
  from dataclasses import dataclass
7
+ from datetime import timedelta
7
8
  from itertools import cycle
8
9
  from typing import Any, List, Mapping
9
10
 
10
- import pendulum
11
11
  import requests
12
- from airbyte_cdk.models import SyncMode
12
+
13
+ from airbyte_cdk.models import FailureType, SyncMode
13
14
  from airbyte_cdk.sources.streams import Stream
15
+ from airbyte_cdk.sources.streams.http import HttpClient
14
16
  from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator
15
17
  from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator
18
+ from airbyte_cdk.utils import AirbyteTracedException
19
+ from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
16
20
 
17
21
 
18
22
  def getter(D: dict, key_or_keys, strict=True):
@@ -42,8 +46,8 @@ class GitHubAPILimitException(Exception):
42
46
  class Token:
43
47
  count_rest: int = 5000
44
48
  count_graphql: int = 5000
45
- reset_at_rest: pendulum.DateTime = pendulum.now()
46
- reset_at_graphql: pendulum.DateTime = pendulum.now()
49
+ reset_at_rest: AirbyteDateTime = ab_datetime_now()
50
+ reset_at_graphql: AirbyteDateTime = ab_datetime_now()
47
51
 
48
52
 
49
53
  class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
@@ -54,17 +58,33 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
54
58
  the first token becomes available again.
55
59
  """
56
60
 
57
- DURATION = pendulum.duration(seconds=3600) # Duration at which the current rate limit window resets
61
+ DURATION = timedelta(seconds=3600) # Duration at which the current rate limit window resets
58
62
 
59
63
  def __init__(self, tokens: List[str], auth_method: str = "token", auth_header: str = "Authorization"):
64
+ self._logger = logging.getLogger("airbyte")
60
65
  self._auth_method = auth_method
61
66
  self._auth_header = auth_header
62
67
  self._tokens = {t: Token() for t in tokens}
68
+ # It would've been nice to instantiate a single client on this authenticator. However, we are checking
69
+ # the limits of each token which is associated with a TokenAuthenticator. And each HttpClient can only
70
+ # correspond to one authenticator.
71
+ self._token_to_http_client: Mapping[str, HttpClient] = self._initialize_http_clients(tokens)
63
72
  self.check_all_tokens()
64
73
  self._tokens_iter = cycle(self._tokens)
65
74
  self._active_token = next(self._tokens_iter)
66
75
  self._max_time = 60 * 10 # 10 minutes as default
67
76
 
77
+ def _initialize_http_clients(self, tokens: List[str]) -> Mapping[str, HttpClient]:
78
+ return {
79
+ token: HttpClient(
80
+ name="token_validator",
81
+ logger=self._logger,
82
+ authenticator=TokenAuthenticator(token, auth_method=self._auth_method),
83
+ use_cache=False, # We don't want to reuse cached valued because rate limit values change frequently
84
+ )
85
+ for token in tokens
86
+ }
87
+
68
88
  @property
69
89
  def auth_header(self) -> str:
70
90
  return self._auth_header
@@ -99,7 +119,6 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
99
119
 
100
120
  @property
101
121
  def token(self) -> str:
102
-
103
122
  token = self.current_active_token
104
123
  return f"{self._auth_method} {token}"
105
124
 
@@ -113,23 +132,38 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
113
132
 
114
133
  def _check_token_limits(self, token: str):
115
134
  """check that token is not limited"""
116
- headers = {"Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28"}
117
- rate_limit_info = (
118
- requests.get(
119
- "https://api.github.com/rate_limit", headers=headers, auth=TokenAuthenticator(token, auth_method=self._auth_method)
120
- )
121
- .json()
122
- .get("resources")
135
+
136
+ http_client = self._token_to_http_client.get(token)
137
+ if not http_client:
138
+ raise ValueError("No HttpClient was initialized for this token. This is unexpected. Please contact Airbyte support.")
139
+
140
+ _, response = http_client.send_request(
141
+ http_method="GET",
142
+ url="https://api.github.com/rate_limit",
143
+ headers={"Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28"},
144
+ request_kwargs={},
123
145
  )
146
+
147
+ response_body = response.json()
148
+ if "resources" not in response_body:
149
+ raise AirbyteTracedException(
150
+ failure_type=FailureType.config_error,
151
+ internal_message=f"Token rate limit info response did not contain expected key: resources",
152
+ message="Unable to validate token. Please double check that specified authentication tokens are correct",
153
+ )
154
+
155
+ rate_limit_info = response_body.get("resources")
124
156
  token_info = self._tokens[token]
125
157
  remaining_info_core = rate_limit_info.get("core")
126
- token_info.count_rest, token_info.reset_at_rest = remaining_info_core.get("remaining"), pendulum.from_timestamp(
127
- remaining_info_core.get("reset")
158
+ token_info.count_rest, token_info.reset_at_rest = (
159
+ remaining_info_core.get("remaining"),
160
+ ab_datetime_parse(remaining_info_core.get("reset")),
128
161
  )
129
162
 
130
163
  remaining_info_graphql = rate_limit_info.get("graphql")
131
- token_info.count_graphql, token_info.reset_at_graphql = remaining_info_graphql.get("remaining"), pendulum.from_timestamp(
132
- remaining_info_graphql.get("reset")
164
+ token_info.count_graphql, token_info.reset_at_graphql = (
165
+ remaining_info_graphql.get("remaining"),
166
+ ab_datetime_parse(remaining_info_graphql.get("reset")),
133
167
  )
134
168
 
135
169
  def check_all_tokens(self):
@@ -141,7 +175,7 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
141
175
  setattr(current_token, count_attr, getattr(current_token, count_attr) - 1)
142
176
  return True
143
177
  elif all(getattr(x, count_attr) == 0 for x in self._tokens.values()):
144
- min_time_to_wait = min((getattr(x, reset_attr) - pendulum.now()).in_seconds() for x in self._tokens.values())
178
+ min_time_to_wait = min((getattr(x, reset_attr) - ab_datetime_now()).total_seconds() for x in self._tokens.values())
145
179
  if min_time_to_wait < self.max_time:
146
180
  time.sleep(min_time_to_wait if min_time_to_wait > 0 else 0)
147
181
  self.check_all_tokens()
@@ -1,36 +0,0 @@
1
- [build-system]
2
- requires = [
3
- "poetry-core>=1.0.0",
4
- ]
5
- build-backend = "poetry.core.masonry.api"
6
-
7
- [tool.poetry]
8
- version = "1.8.13"
9
- name = "airbyte-source-github"
10
- description = "Source implementation for GitHub."
11
- authors = [
12
- "Airbyte <contact@airbyte.io>",
13
- ]
14
- license = "MIT"
15
- readme = "README.md"
16
- documentation = "https://docs.airbyte.com/integrations/sources/github"
17
- homepage = "https://airbyte.com"
18
- repository = "https://github.com/airbytehq/airbyte"
19
- packages = [
20
- { include = "source_github" },
21
- ]
22
-
23
- [tool.poetry.dependencies]
24
- python = "^3.10,<3.12"
25
- airbyte-cdk = "^4"
26
- sgqlc = "==16.3"
27
-
28
- [tool.poetry.scripts]
29
- source-github = "source_github.run:run"
30
-
31
- [tool.poetry.group.dev.dependencies]
32
- requests-mock = "^1.9.3"
33
- freezegun = "^1.2"
34
- pytest-mock = "^3.6.1"
35
- pytest = "^6.2"
36
- responses = "^0.23.1"
@@ -11,6 +11,7 @@ from sgqlc.operation import Selector
11
11
 
12
12
  from . import github_schema
13
13
 
14
+
14
15
  _schema = github_schema
15
16
  _schema_root = _schema.github_schema
16
17
 
@@ -165,7 +166,6 @@ def get_query_issue_reactions(owner, name, first, after, number=None):
165
166
 
166
167
 
167
168
  class QueryReactions:
168
-
169
169
  # AVERAGE_REVIEWS - optimal number of reviews to fetch inside every pull request.
170
170
  # If we try to fetch too many (up to 100) we will spend too many scores of query cost.
171
171
  # https://docs.github.com/en/graphql/overview/resource-limitations#calculating-a-rate-limit-score-before-running-the-call