airbyte-source-github 1.8.39__tar.gz → 2.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/PKG-INFO +7 -5
  2. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/pyproject.toml +5 -5
  3. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/__init__.py +1 -0
  4. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/config_migrations.py +4 -1
  5. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/errors_handlers.py +1 -1
  6. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/comment.json +2 -2
  7. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/commented.json +2 -2
  8. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/cross_referenced.json +4 -4
  9. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/reactions.json +2 -2
  10. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/source.py +5 -1
  11. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/streams.py +47 -31
  12. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/utils.py +48 -16
  13. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/README.md +0 -0
  14. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/backoff_strategies.py +0 -0
  15. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/constants.py +0 -0
  16. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/github_schema.py +0 -0
  17. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/graphql.py +0 -0
  18. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/run.py +0 -0
  19. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/assignees.json +0 -0
  20. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/branches.json +0 -0
  21. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/collaborators.json +0 -0
  22. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/comments.json +0 -0
  23. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/commit_comment_reactions.json +0 -0
  24. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/commit_comments.json +0 -0
  25. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/commits.json +0 -0
  26. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/contributor_activity.json +0 -0
  27. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/deployments.json +0 -0
  28. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/events.json +0 -0
  29. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_comment_reactions.json +0 -0
  30. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_events.json +0 -0
  31. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_labels.json +0 -0
  32. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_milestones.json +0 -0
  33. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_reactions.json +0 -0
  34. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issue_timeline_events.json +0 -0
  35. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/issues.json +0 -0
  36. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/organizations.json +0 -0
  37. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/project_cards.json +0 -0
  38. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/project_columns.json +0 -0
  39. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/projects.json +0 -0
  40. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/projects_v2.json +0 -0
  41. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/pull_request_comment_reactions.json +0 -0
  42. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/pull_request_commits.json +0 -0
  43. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/pull_request_stats.json +0 -0
  44. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/pull_requests.json +0 -0
  45. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/releases.json +0 -0
  46. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/repositories.json +0 -0
  47. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/review_comments.json +0 -0
  48. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/reviews.json +0 -0
  49. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/committed.json +0 -0
  50. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/events/reviewed.json +0 -0
  51. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/reaction.json +0 -0
  52. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/user.json +0 -0
  53. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/shared/user_graphql.json +0 -0
  54. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/stargazers.json +0 -0
  55. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/tags.json +0 -0
  56. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/team_members.json +0 -0
  57. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/team_memberships.json +0 -0
  58. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/teams.json +0 -0
  59. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/users.json +0 -0
  60. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/workflow_jobs.json +0 -0
  61. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/workflow_runs.json +0 -0
  62. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/schemas/workflows.json +0 -0
  63. {airbyte_source_github-1.8.39 → airbyte_source_github-2.1.5}/source_github/spec.json +0 -0
@@ -1,17 +1,19 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-github
3
- Version: 1.8.39
3
+ Version: 2.1.5
4
4
  Summary: Source implementation for GitHub.
5
5
  Home-page: https://airbyte.com
6
- License: MIT
6
+ License: ELv2
7
7
  Author: Airbyte
8
8
  Author-email: contact@airbyte.io
9
- Requires-Python: >=3.10,<3.12
10
- Classifier: License :: OSI Approved :: MIT License
9
+ Requires-Python: >=3.10,<3.14
10
+ Classifier: License :: Other/Proprietary License
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
- Requires-Dist: airbyte-cdk (>=4,<5)
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: airbyte-cdk (>=7.4.1,<8.0.0)
15
17
  Requires-Dist: sgqlc (==16.3)
16
18
  Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/github
17
19
  Project-URL: Repository, https://github.com/airbytehq/airbyte
@@ -3,11 +3,11 @@ requires = [ "poetry-core>=1.0.0",]
3
3
  build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
- version = "1.8.39"
6
+ version = "2.1.5"
7
7
  name = "airbyte-source-github"
8
8
  description = "Source implementation for GitHub."
9
9
  authors = [ "Airbyte <contact@airbyte.io>",]
10
- license = "MIT"
10
+ license = "ELv2"
11
11
  readme = "README.md"
12
12
  documentation = "https://docs.airbyte.com/integrations/sources/github"
13
13
  homepage = "https://airbyte.com"
@@ -16,8 +16,8 @@ repository = "https://github.com/airbytehq/airbyte"
16
16
  include = "source_github"
17
17
 
18
18
  [tool.poetry.dependencies]
19
- python = "^3.10,<3.12"
20
- airbyte-cdk = "^4"
19
+ python = "^3.10,<3.14"
20
+ airbyte-cdk = "^7.4.1"
21
21
  sgqlc = "==16.3"
22
22
 
23
23
  [tool.poetry.scripts]
@@ -45,4 +45,4 @@ include = [
45
45
  test-unit-tests.shell = '''
46
46
  poetry run pytest --junitxml=build/test-results/pytest-unit-tests-junit.xml --ignore=unit_tests/integration/ unit_tests
47
47
  poetry run pytest --junitxml=build/test-results/pytest-unit-integration-tests-junit.xml unit_tests/integration/
48
- '''
48
+ '''
@@ -24,4 +24,5 @@ SOFTWARE.
24
24
 
25
25
  from .source import SourceGithub
26
26
 
27
+
27
28
  __all__ = ["SourceGithub"]
@@ -6,8 +6,11 @@ import logging
6
6
  from abc import ABC
7
7
  from typing import Any, List, Mapping
8
8
 
9
+ import orjson
10
+
9
11
  from airbyte_cdk.config_observation import create_connector_config_control_message
10
12
  from airbyte_cdk.entrypoint import AirbyteEntrypoint
13
+ from airbyte_cdk.models import AirbyteMessageSerializer
11
14
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
12
15
 
13
16
  from .source import SourceGithub
@@ -72,7 +75,7 @@ class MigrateStringToArray(ABC):
72
75
  cls.message_repository.emit_message(create_connector_config_control_message(migrated_config))
73
76
  # emit the Airbyte Control Message from message queue to stdout
74
77
  for message in cls.message_repository._message_queue:
75
- print(message.json(exclude_unset=True))
78
+ print(orjson.dumps(AirbyteMessageSerializer.dump(message)).decode())
76
79
 
77
80
  @classmethod
78
81
  def migrate(cls, args: List[str], source: SourceGithub) -> None:
@@ -6,10 +6,10 @@ from typing import Optional, Union
6
6
 
7
7
  import requests
8
8
 
9
+ from airbyte_cdk.models import FailureType
9
10
  from airbyte_cdk.sources.streams.http import HttpStream
10
11
  from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
11
12
  from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping import DEFAULT_ERROR_MAPPING
12
- from airbyte_protocol.models import FailureType
13
13
 
14
14
  from . import constants
15
15
 
@@ -174,8 +174,8 @@
174
174
  "properties": {
175
175
  "url": { "type": "string" },
176
176
  "total_count": { "type": "integer" },
177
- "+1": { "type": "integer" },
178
- "-1": { "type": "integer" },
177
+ "plus_one": { "type": "integer" },
178
+ "minus_one": { "type": "integer" },
179
179
  "laugh": { "type": "integer" },
180
180
  "confused": { "type": "integer" },
181
181
  "heart": { "type": "integer" },
@@ -99,8 +99,8 @@
99
99
  "properties": {
100
100
  "url": { "type": "string" },
101
101
  "total_count": { "type": "integer" },
102
- "+1": { "type": "integer" },
103
- "-1": { "type": "integer" },
102
+ "plus_one": { "type": "integer" },
103
+ "minus_one": { "type": "integer" },
104
104
  "laugh": { "type": "integer" },
105
105
  "confused": { "type": "integer" },
106
106
  "heart": { "type": "integer" },
@@ -752,10 +752,10 @@
752
752
  "total_count": {
753
753
  "type": "integer"
754
754
  },
755
- "+1": {
755
+ "plus_one": {
756
756
  "type": "integer"
757
757
  },
758
- "-1": {
758
+ "minus_one": {
759
759
  "type": "integer"
760
760
  },
761
761
  "laugh": {
@@ -788,10 +788,10 @@
788
788
  "total_count": {
789
789
  "type": "integer"
790
790
  },
791
- "+1": {
791
+ "plus_one": {
792
792
  "type": "integer"
793
793
  },
794
- "-1": {
794
+ "minus_one": {
795
795
  "type": "integer"
796
796
  },
797
797
  "laugh": {
@@ -7,10 +7,10 @@
7
7
  "total_count": {
8
8
  "type": ["null", "integer"]
9
9
  },
10
- "+1": {
10
+ "plus_one": {
11
11
  "type": ["null", "integer"]
12
12
  },
13
- "-1": {
13
+ "minus_one": {
14
14
  "type": ["null", "integer"]
15
15
  },
16
16
  "laugh": {
@@ -9,6 +9,7 @@ from urllib.parse import urlparse
9
9
  from airbyte_cdk.models import FailureType
10
10
  from airbyte_cdk.sources import AbstractSource
11
11
  from airbyte_cdk.sources.streams import Stream
12
+ from airbyte_cdk.sources.streams.http.http_client import MessageRepresentationAirbyteTracedErrors
12
13
  from airbyte_cdk.sources.streams.http.requests_native_auth import MultipleTokenAuthenticator
13
14
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
14
15
  from source_github.utils import MultipleTokenAuthenticatorWithRateLimiter
@@ -184,7 +185,7 @@ class SourceGithub(AbstractSource):
184
185
  # 404 Client Error: Not Found for url: https://api.github.com/orgs/airbytehqBLA/repos?per_page=100
185
186
  org_name = message.split("https://api.github.com/orgs/")[1].split("/")[0]
186
187
  user_message = f'Organization name: "{org_name}" is unknown, "repository" config option should be updated. Please validate your repository config.'
187
- elif "401 Client Error: Unauthorized for url" in message:
188
+ elif "401 Client Error: Unauthorized for url" in message or ("Error: Unauthorized" in message and "401" in message):
188
189
  # 401 Client Error: Unauthorized for url: https://api.github.com/orgs/datarootsio/repos?per_page=100&sort=updated&direction=desc
189
190
  user_message = (
190
191
  "Github credentials have expired or changed, please review your credentials and re-authenticate or renew your access token."
@@ -203,6 +204,9 @@ class SourceGithub(AbstractSource):
203
204
  )
204
205
  return True, None
205
206
 
207
+ except MessageRepresentationAirbyteTracedErrors as e:
208
+ user_message = self.user_friendly_error_message(e.message)
209
+ return False, user_message or e.message
206
210
  except Exception as e:
207
211
  message = repr(e)
208
212
  user_message = self.user_friendly_error_message(message)
@@ -4,14 +4,15 @@
4
4
 
5
5
  import re
6
6
  from abc import ABC, abstractmethod
7
+ from datetime import timedelta, timezone
7
8
  from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
8
9
  from urllib import parse
9
10
 
10
- import pendulum
11
11
  import requests
12
+ from dateutil.parser import parse as date_parse
12
13
 
13
14
  from airbyte_cdk import BackoffStrategy, StreamSlice
14
- from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode
15
+ from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level, SyncMode
15
16
  from airbyte_cdk.models import Type as MessageType
16
17
  from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
17
18
  from airbyte_cdk.sources.streams.checkpoint.substream_resumable_full_refresh_cursor import SubstreamResumableFullRefreshCursor
@@ -19,8 +20,9 @@ from airbyte_cdk.sources.streams.core import CheckpointMixin, Stream
19
20
  from airbyte_cdk.sources.streams.http import HttpStream
20
21
  from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
21
22
  from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException, UserDefinedBackoffException
23
+ from airbyte_cdk.sources.streams.http.http_client import MessageRepresentationAirbyteTracedErrors
22
24
  from airbyte_cdk.utils import AirbyteTracedException
23
- from airbyte_protocol.models import FailureType
25
+ from airbyte_cdk.utils.datetime_helpers import ab_datetime_format, ab_datetime_parse
24
26
 
25
27
  from . import constants
26
28
  from .backoff_strategies import ContributorActivityBackoffStrategy, GithubStreamABCBackoffStrategy
@@ -128,11 +130,14 @@ class GithubStreamABC(HttpStream, ABC):
128
130
  # Reading records while handling the errors
129
131
  try:
130
132
  yield from super().read_records(stream_slice=stream_slice, **kwargs)
131
- except DefaultBackoffException as e:
133
+ # HTTP Client wraps DefaultBackoffException into MessageRepresentationAirbyteTracedErrors
134
+ except MessageRepresentationAirbyteTracedErrors as e:
132
135
  # This whole try/except situation in `read_records()` isn't good but right now in `self._send_request()`
133
136
  # function we have `response.raise_for_status()` so we don't have much choice on how to handle errors.
134
137
  # Bocked on https://github.com/airbytehq/airbyte/issues/3514.
135
- if e.response.status_code == requests.codes.NOT_FOUND:
138
+ if not hasattr(e, "_exception") and not hasattr(e._exception, "response"):
139
+ raise e
140
+ if e._exception.response.status_code == requests.codes.NOT_FOUND:
136
141
  # A lot of streams are not available for repositories owned by a user instead of an organization.
137
142
  if isinstance(self, Organizations):
138
143
  error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for organization `{organisation}`."
@@ -140,8 +145,8 @@ class GithubStreamABC(HttpStream, ABC):
140
145
  error_msg = f"Syncing `{self.__class__.__name__}` stream for organization `{organisation}`, team `{stream_slice.get('team_slug')}` and user `{stream_slice.get('username')}` isn't available: User has no team membership. Skipping..."
141
146
  else:
142
147
  error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{repository}`."
143
- elif e.response.status_code == requests.codes.FORBIDDEN:
144
- error_msg = str(e.response.json().get("message"))
148
+ elif e._exception.response.status_code == requests.codes.FORBIDDEN:
149
+ error_msg = str(e._exception.response.json().get("message"))
145
150
  # When using the `check_connection` method, we should raise an error if we do not have access to the repository.
146
151
  if isinstance(self, Repositories):
147
152
  raise e
@@ -157,27 +162,27 @@ class GithubStreamABC(HttpStream, ABC):
157
162
  error_msg = (
158
163
  f"Syncing `{self.name}` stream isn't available for repository `{repository}`. Full error message: {error_msg}"
159
164
  )
160
- elif e.response.status_code == requests.codes.UNAUTHORIZED:
165
+ elif e._exception.response.status_code == requests.codes.UNAUTHORIZED:
161
166
  if self.access_token_type == constants.PERSONAL_ACCESS_TOKEN_TITLE:
162
- error_msg = str(e.response.json().get("message"))
167
+ error_msg = str(e._exception.response.json().get("message"))
163
168
  self.logger.error(f"{self.access_token_type} renewal is required: {error_msg}")
164
169
  raise e
165
- elif e.response.status_code == requests.codes.GONE and isinstance(self, Projects):
170
+ elif e._exception.response.status_code == requests.codes.GONE and isinstance(self, Projects):
166
171
  # Some repos don't have projects enabled and we we get "410 Client Error: Gone for
167
172
  # url: https://api.github.com/repos/xyz/projects?per_page=100" error.
168
173
  error_msg = f"Syncing `Projects` stream isn't available for repository `{stream_slice['repository']}`."
169
- elif e.response.status_code == requests.codes.CONFLICT:
174
+ elif e._exception.response.status_code == requests.codes.CONFLICT:
170
175
  error_msg = (
171
176
  f"Syncing `{self.name}` stream isn't available for repository "
172
177
  f"`{stream_slice['repository']}`, it seems like this repository is empty."
173
178
  )
174
- elif e.response.status_code == requests.codes.SERVER_ERROR and isinstance(self, WorkflowRuns):
179
+ elif e._exception.response.status_code == requests.codes.SERVER_ERROR and isinstance(self, WorkflowRuns):
175
180
  error_msg = f"Syncing `{self.name}` stream isn't available for repository `{stream_slice['repository']}`."
176
- elif e.response.status_code == requests.codes.BAD_GATEWAY:
181
+ elif e._exception.response.status_code == requests.codes.BAD_GATEWAY:
177
182
  error_msg = f"Stream {self.name} temporary failed. Try to re-run sync later"
178
183
  else:
179
184
  # most probably here we're facing a 500 server error and a risk to get a non-json response, so lets output response.text
180
- self.logger.error(f"Undefined error while reading records: {e.response.text}")
185
+ self.logger.error(f"Undefined error while reading records: {e._exception.response.text}")
181
186
  raise e
182
187
 
183
188
  self.logger.warning(error_msg)
@@ -216,6 +221,14 @@ class GithubStream(GithubStreamABC):
216
221
 
217
222
  def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
218
223
  record["repository"] = stream_slice["repository"]
224
+
225
+ if "reactions" in record and record["reactions"]:
226
+ reactions = record["reactions"]
227
+ if "+1" in reactions:
228
+ reactions["plus_one"] = reactions.pop("+1")
229
+ if "-1" in reactions:
230
+ reactions["minus_one"] = reactions.pop("-1")
231
+
219
232
  return record
220
233
 
221
234
  def parse_response(
@@ -1437,7 +1450,8 @@ class Workflows(SemiIncrementalMixin, GithubStream):
1437
1450
  yield self.transform(record=record, stream_slice=stream_slice)
1438
1451
 
1439
1452
  def convert_cursor_value(self, value):
1440
- return pendulum.parse(value).in_tz(tz="UTC").format("YYYY-MM-DDTHH:mm:ss[Z]")
1453
+ parsed_value = date_parse(value).astimezone(timezone.utc)
1454
+ return ab_datetime_format(parsed_value, "%Y-%m-%dT%H:%M:%SZ")
1441
1455
 
1442
1456
 
1443
1457
  class WorkflowRuns(SemiIncrementalMixin, GithubStream):
@@ -1478,7 +1492,7 @@ class WorkflowRuns(SemiIncrementalMixin, GithubStream):
1478
1492
  # the state is updated only in the end of the sync as records are sorted in reverse order
1479
1493
  new_state = self.state
1480
1494
  if start_point:
1481
- break_point = (pendulum.parse(start_point) - pendulum.duration(days=self.re_run_period)).to_iso8601_string()
1495
+ break_point = (ab_datetime_parse(start_point) - timedelta(days=self.re_run_period)).isoformat()
1482
1496
  for record in super(SemiIncrementalMixin, self).read_records(
1483
1497
  sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
1484
1498
  ):
@@ -1663,22 +1677,24 @@ class ContributorActivity(GithubStream):
1663
1677
  repository = stream_slice.get("repository", "")
1664
1678
  try:
1665
1679
  yield from super().read_records(stream_slice=stream_slice, **kwargs)
1666
- except UserDefinedBackoffException as e:
1667
- if e.response.status_code == requests.codes.ACCEPTED:
1668
- yield AirbyteMessage(
1669
- type=MessageType.LOG,
1670
- log=AirbyteLogMessage(
1671
- level=Level.INFO,
1672
- message=f"Syncing `{self.__class__.__name__}` " f"stream isn't available for repository `{repository}`.",
1673
- ),
1674
- )
1680
+ # HTTP Client wraps BackoffException into MessageRepresentationAirbyteTracedErrors
1681
+ except MessageRepresentationAirbyteTracedErrors as e:
1682
+ if hasattr(e, "_exception") and hasattr(e._exception, "response"):
1683
+ if e._exception.response.status_code == requests.codes.ACCEPTED:
1684
+ yield AirbyteMessage(
1685
+ type=MessageType.LOG,
1686
+ log=AirbyteLogMessage(
1687
+ level=Level.INFO,
1688
+ message=f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{repository}`.",
1689
+ ),
1690
+ )
1675
1691
 
1676
- # In order to retain the existing stream behavior before we added RFR to this stream, we need to close out the
1677
- # partition after we give up the maximum number of retries on the 202 response. This does lead to the question
1678
- # of if we should prematurely exit in the first place, but for now we're going to aim for feature parity
1679
- partition_obj = stream_slice.get("partition")
1680
- if self.cursor and partition_obj:
1681
- self.cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition_obj))
1692
+ # In order to retain the existing stream behavior before we added RFR to this stream, we need to close out the
1693
+ # partition after we give up the maximum number of retries on the 202 response. This does lead to the question
1694
+ # of if we should prematurely exit in the first place, but for now we're going to aim for feature parity
1695
+ partition_obj = stream_slice.get("partition")
1696
+ if self.cursor and partition_obj:
1697
+ self.cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition_obj))
1682
1698
  else:
1683
1699
  raise e
1684
1700
 
@@ -1,19 +1,22 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
4
+ import logging
5
5
  import time
6
6
  from dataclasses import dataclass
7
+ from datetime import timedelta
7
8
  from itertools import cycle
8
9
  from typing import Any, List, Mapping
9
10
 
10
- import pendulum
11
11
  import requests
12
12
 
13
- from airbyte_cdk.models import SyncMode
13
+ from airbyte_cdk.models import FailureType, SyncMode
14
14
  from airbyte_cdk.sources.streams import Stream
15
+ from airbyte_cdk.sources.streams.http import HttpClient
15
16
  from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator
16
17
  from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator
18
+ from airbyte_cdk.utils import AirbyteTracedException
19
+ from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
17
20
 
18
21
 
19
22
  def getter(D: dict, key_or_keys, strict=True):
@@ -43,8 +46,8 @@ class GitHubAPILimitException(Exception):
43
46
  class Token:
44
47
  count_rest: int = 5000
45
48
  count_graphql: int = 5000
46
- reset_at_rest: pendulum.DateTime = pendulum.now()
47
- reset_at_graphql: pendulum.DateTime = pendulum.now()
49
+ reset_at_rest: AirbyteDateTime = ab_datetime_now()
50
+ reset_at_graphql: AirbyteDateTime = ab_datetime_now()
48
51
 
49
52
 
50
53
  class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
@@ -55,17 +58,33 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
55
58
  the first token becomes available again.
56
59
  """
57
60
 
58
- DURATION = pendulum.duration(seconds=3600) # Duration at which the current rate limit window resets
61
+ DURATION = timedelta(seconds=3600) # Duration at which the current rate limit window resets
59
62
 
60
63
  def __init__(self, tokens: List[str], auth_method: str = "token", auth_header: str = "Authorization"):
64
+ self._logger = logging.getLogger("airbyte")
61
65
  self._auth_method = auth_method
62
66
  self._auth_header = auth_header
63
67
  self._tokens = {t: Token() for t in tokens}
68
+ # It would've been nice to instantiate a single client on this authenticator. However, we are checking
69
+ # the limits of each token which is associated with a TokenAuthenticator. And each HttpClient can only
70
+ # correspond to one authenticator.
71
+ self._token_to_http_client: Mapping[str, HttpClient] = self._initialize_http_clients(tokens)
64
72
  self.check_all_tokens()
65
73
  self._tokens_iter = cycle(self._tokens)
66
74
  self._active_token = next(self._tokens_iter)
67
75
  self._max_time = 60 * 10 # 10 minutes as default
68
76
 
77
+ def _initialize_http_clients(self, tokens: List[str]) -> Mapping[str, HttpClient]:
78
+ return {
79
+ token: HttpClient(
80
+ name="token_validator",
81
+ logger=self._logger,
82
+ authenticator=TokenAuthenticator(token, auth_method=self._auth_method),
83
+ use_cache=False, # We don't want to reuse cached valued because rate limit values change frequently
84
+ )
85
+ for token in tokens
86
+ }
87
+
69
88
  @property
70
89
  def auth_header(self) -> str:
71
90
  return self._auth_header
@@ -113,25 +132,38 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
113
132
 
114
133
  def _check_token_limits(self, token: str):
115
134
  """check that token is not limited"""
116
- headers = {"Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28"}
117
- rate_limit_info = (
118
- requests.get(
119
- "https://api.github.com/rate_limit", headers=headers, auth=TokenAuthenticator(token, auth_method=self._auth_method)
120
- )
121
- .json()
122
- .get("resources")
135
+
136
+ http_client = self._token_to_http_client.get(token)
137
+ if not http_client:
138
+ raise ValueError("No HttpClient was initialized for this token. This is unexpected. Please contact Airbyte support.")
139
+
140
+ _, response = http_client.send_request(
141
+ http_method="GET",
142
+ url="https://api.github.com/rate_limit",
143
+ headers={"Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28"},
144
+ request_kwargs={},
123
145
  )
146
+
147
+ response_body = response.json()
148
+ if "resources" not in response_body:
149
+ raise AirbyteTracedException(
150
+ failure_type=FailureType.config_error,
151
+ internal_message=f"Token rate limit info response did not contain expected key: resources",
152
+ message="Unable to validate token. Please double check that specified authentication tokens are correct",
153
+ )
154
+
155
+ rate_limit_info = response_body.get("resources")
124
156
  token_info = self._tokens[token]
125
157
  remaining_info_core = rate_limit_info.get("core")
126
158
  token_info.count_rest, token_info.reset_at_rest = (
127
159
  remaining_info_core.get("remaining"),
128
- pendulum.from_timestamp(remaining_info_core.get("reset")),
160
+ ab_datetime_parse(remaining_info_core.get("reset")),
129
161
  )
130
162
 
131
163
  remaining_info_graphql = rate_limit_info.get("graphql")
132
164
  token_info.count_graphql, token_info.reset_at_graphql = (
133
165
  remaining_info_graphql.get("remaining"),
134
- pendulum.from_timestamp(remaining_info_graphql.get("reset")),
166
+ ab_datetime_parse(remaining_info_graphql.get("reset")),
135
167
  )
136
168
 
137
169
  def check_all_tokens(self):
@@ -143,7 +175,7 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
143
175
  setattr(current_token, count_attr, getattr(current_token, count_attr) - 1)
144
176
  return True
145
177
  elif all(getattr(x, count_attr) == 0 for x in self._tokens.values()):
146
- min_time_to_wait = min((getattr(x, reset_attr) - pendulum.now()).in_seconds() for x in self._tokens.values())
178
+ min_time_to_wait = min((getattr(x, reset_attr) - ab_datetime_now()).total_seconds() for x in self._tokens.values())
147
179
  if min_time_to_wait < self.max_time:
148
180
  time.sleep(min_time_to_wait if min_time_to_wait > 0 else 0)
149
181
  self.check_all_tokens()