airbyte-source-github 1.8.41__py3-none-any.whl → 1.9.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-github might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-github
3
- Version: 1.8.41
3
+ Version: 1.9.0rc1
4
4
  Summary: Source implementation for GitHub.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -11,7 +11,7 @@ Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
- Requires-Dist: airbyte-cdk (>=4,<5)
14
+ Requires-Dist: airbyte-cdk (>=7,<8)
15
15
  Requires-Dist: sgqlc (==16.3)
16
16
  Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/github
17
17
  Project-URL: Repository, https://github.com/airbytehq/airbyte
@@ -1,8 +1,8 @@
1
1
  source_github/__init__.py,sha256=punPc3v0mXEYOun7cbkfM5KUhgjv72B9DgDhI4VtzcQ,1134
2
2
  source_github/backoff_strategies.py,sha256=ChkWEmqZL7Qrr1zidOfjZWlMWIVRVO2yqZ7QE_HaZP8,2287
3
- source_github/config_migrations.py,sha256=H58hHqAnuvb0B8IXHW4aEDZ3HotEg7HdA2rXDG9XW7A,3832
3
+ source_github/config_migrations.py,sha256=guUJAdNP-liUciVaJB4ackEEMCN4jd7bNd85QFmDNlU,3932
4
4
  source_github/constants.py,sha256=Hj3Q4y7OoU-Iff4m9gEC2CjwmWJYXhNbHVNjg8EBLmQ,238
5
- source_github/errors_handlers.py,sha256=POqpvbrNAoZztjwByOJxJbIaxhWC8KWLi3gK1WzbiK8,7259
5
+ source_github/errors_handlers.py,sha256=nKd3iWnrOjFXqVfqBCWzLLWpVxLpBflv676txxSI6x8,7254
6
6
  source_github/github_schema.py,sha256=2AXmTN_s_VqvZAR1GPqus5HhSXWokS4N7HMQQqefaYw,1600315
7
7
  source_github/graphql.py,sha256=edd7EoXwD5rRDHrjm4ZBy5VGkruTScL_Xipy4CBmTFc,11625
8
8
  source_github/run.py,sha256=onA-rP2aVhWHvDquKZdR1381CU66rnzqJ7EFMS5dd4Q,407
@@ -56,9 +56,9 @@ source_github/schemas/workflow_runs.json,sha256=XDmIsjtzka-ItEonImD3ZATZjxRNkbFo
56
56
  source_github/schemas/workflows.json,sha256=gSNw8WZaVKbX4AL97PbjZHzvxcOltXqv9Ao1RNQOFXM,1470
57
57
  source_github/source.py,sha256=1o8eayigi4xSUeNHdCd-mhNswGUq_XQrVk2eihTjm1o,14246
58
58
  source_github/spec.json,sha256=7LOQm01fP_RvPF-HifhNPJ7i0AxT2LTNPaLAA3uOfNY,7443
59
- source_github/streams.py,sha256=h5YMPLIsLTv7WX_mcURVC2LmmWBLraZvKH8J_GzV1IE,77441
60
- source_github/utils.py,sha256=Ztd8VWwzTNUg_A96_8R9XSKorIcBa8wJ6aYUqygRkyk,5492
61
- airbyte_source_github-1.8.41.dist-info/METADATA,sha256=UCpiuy6xgzPizLpnY_aHd--aI09v4o0asRhFUp3HKZY,5190
62
- airbyte_source_github-1.8.41.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
63
- airbyte_source_github-1.8.41.dist-info/entry_points.txt,sha256=gYhqVrTAZvMwuYByg0b_-o115yUFLLcfNxMrLZmiW9k,55
64
- airbyte_source_github-1.8.41.dist-info/RECORD,,
59
+ source_github/streams.py,sha256=MErXqg35u6l-iBMylyeGitsMr_68aoyz6TqGEyufmgU,78158
60
+ source_github/utils.py,sha256=nSARcWkLTsoyh6c7j8AvxGCgK1uUPDwOe970_1Ywj94,5586
61
+ airbyte_source_github-1.9.0rc1.dist-info/METADATA,sha256=0nrlurzAIyvRxGVKFCqC-eEgmrbNtLNyHIqqYDg5ATY,5192
62
+ airbyte_source_github-1.9.0rc1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
63
+ airbyte_source_github-1.9.0rc1.dist-info/entry_points.txt,sha256=gYhqVrTAZvMwuYByg0b_-o115yUFLLcfNxMrLZmiW9k,55
64
+ airbyte_source_github-1.9.0rc1.dist-info/RECORD,,
@@ -6,8 +6,11 @@ import logging
6
6
  from abc import ABC
7
7
  from typing import Any, List, Mapping
8
8
 
9
+ import orjson
10
+
9
11
  from airbyte_cdk.config_observation import create_connector_config_control_message
10
12
  from airbyte_cdk.entrypoint import AirbyteEntrypoint
13
+ from airbyte_cdk.models import AirbyteMessageSerializer
11
14
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
12
15
 
13
16
  from .source import SourceGithub
@@ -72,7 +75,7 @@ class MigrateStringToArray(ABC):
72
75
  cls.message_repository.emit_message(create_connector_config_control_message(migrated_config))
73
76
  # emit the Airbyte Control Message from message queue to stdout
74
77
  for message in cls.message_repository._message_queue:
75
- print(message.json(exclude_unset=True))
78
+ print(orjson.dumps(AirbyteMessageSerializer.dump(message)).decode())
76
79
 
77
80
  @classmethod
78
81
  def migrate(cls, args: List[str], source: SourceGithub) -> None:
@@ -6,10 +6,10 @@ from typing import Optional, Union
6
6
 
7
7
  import requests
8
8
 
9
+ from airbyte_cdk.models import FailureType
9
10
  from airbyte_cdk.sources.streams.http import HttpStream
10
11
  from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
11
12
  from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping import DEFAULT_ERROR_MAPPING
12
- from airbyte_protocol.models import FailureType
13
13
 
14
14
  from . import constants
15
15
 
source_github/streams.py CHANGED
@@ -4,14 +4,14 @@
4
4
 
5
5
  import re
6
6
  from abc import ABC, abstractmethod
7
+ from datetime import timedelta
7
8
  from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
8
9
  from urllib import parse
9
10
 
10
- import pendulum
11
11
  import requests
12
12
 
13
13
  from airbyte_cdk import BackoffStrategy, StreamSlice
14
- from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode
14
+ from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level, SyncMode
15
15
  from airbyte_cdk.models import Type as MessageType
16
16
  from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
17
17
  from airbyte_cdk.sources.streams.checkpoint.substream_resumable_full_refresh_cursor import SubstreamResumableFullRefreshCursor
@@ -19,8 +19,9 @@ from airbyte_cdk.sources.streams.core import CheckpointMixin, Stream
19
19
  from airbyte_cdk.sources.streams.http import HttpStream
20
20
  from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
21
21
  from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException, UserDefinedBackoffException
22
+ from airbyte_cdk.sources.streams.http.http_client import MessageRepresentationAirbyteTracedErrors
22
23
  from airbyte_cdk.utils import AirbyteTracedException
23
- from airbyte_protocol.models import FailureType
24
+ from airbyte_cdk.utils.datetime_helpers import ab_datetime_format, ab_datetime_parse
24
25
 
25
26
  from . import constants
26
27
  from .backoff_strategies import ContributorActivityBackoffStrategy, GithubStreamABCBackoffStrategy
@@ -128,11 +129,14 @@ class GithubStreamABC(HttpStream, ABC):
128
129
  # Reading records while handling the errors
129
130
  try:
130
131
  yield from super().read_records(stream_slice=stream_slice, **kwargs)
131
- except DefaultBackoffException as e:
132
+ # HTTP Client wraps DefaultBackoffException into MessageRepresentationAirbyteTracedErrors
133
+ except MessageRepresentationAirbyteTracedErrors as e:
132
134
  # This whole try/except situation in `read_records()` isn't good but right now in `self._send_request()`
133
135
  # function we have `response.raise_for_status()` so we don't have much choice on how to handle errors.
134
136
  # Bocked on https://github.com/airbytehq/airbyte/issues/3514.
135
- if e.response.status_code == requests.codes.NOT_FOUND:
137
+ if not hasattr(e, "_exception") and not hasattr(e._exception, "response"):
138
+ raise e
139
+ if e._exception.response.status_code == requests.codes.NOT_FOUND:
136
140
  # A lot of streams are not available for repositories owned by a user instead of an organization.
137
141
  if isinstance(self, Organizations):
138
142
  error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for organization `{organisation}`."
@@ -140,8 +144,8 @@ class GithubStreamABC(HttpStream, ABC):
140
144
  error_msg = f"Syncing `{self.__class__.__name__}` stream for organization `{organisation}`, team `{stream_slice.get('team_slug')}` and user `{stream_slice.get('username')}` isn't available: User has no team membership. Skipping..."
141
145
  else:
142
146
  error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{repository}`."
143
- elif e.response.status_code == requests.codes.FORBIDDEN:
144
- error_msg = str(e.response.json().get("message"))
147
+ elif e._exception.response.status_code == requests.codes.FORBIDDEN:
148
+ error_msg = str(e._exception.response.json().get("message"))
145
149
  # When using the `check_connection` method, we should raise an error if we do not have access to the repository.
146
150
  if isinstance(self, Repositories):
147
151
  raise e
@@ -157,27 +161,27 @@ class GithubStreamABC(HttpStream, ABC):
157
161
  error_msg = (
158
162
  f"Syncing `{self.name}` stream isn't available for repository `{repository}`. Full error message: {error_msg}"
159
163
  )
160
- elif e.response.status_code == requests.codes.UNAUTHORIZED:
164
+ elif e._exception.response.status_code == requests.codes.UNAUTHORIZED:
161
165
  if self.access_token_type == constants.PERSONAL_ACCESS_TOKEN_TITLE:
162
- error_msg = str(e.response.json().get("message"))
166
+ error_msg = str(e._exception.response.json().get("message"))
163
167
  self.logger.error(f"{self.access_token_type} renewal is required: {error_msg}")
164
168
  raise e
165
- elif e.response.status_code == requests.codes.GONE and isinstance(self, Projects):
169
+ elif e._exception.response.status_code == requests.codes.GONE and isinstance(self, Projects):
166
170
  # Some repos don't have projects enabled and we we get "410 Client Error: Gone for
167
171
  # url: https://api.github.com/repos/xyz/projects?per_page=100" error.
168
172
  error_msg = f"Syncing `Projects` stream isn't available for repository `{stream_slice['repository']}`."
169
- elif e.response.status_code == requests.codes.CONFLICT:
173
+ elif e._exception.response.status_code == requests.codes.CONFLICT:
170
174
  error_msg = (
171
175
  f"Syncing `{self.name}` stream isn't available for repository "
172
176
  f"`{stream_slice['repository']}`, it seems like this repository is empty."
173
177
  )
174
- elif e.response.status_code == requests.codes.SERVER_ERROR and isinstance(self, WorkflowRuns):
178
+ elif e._exception.response.status_code == requests.codes.SERVER_ERROR and isinstance(self, WorkflowRuns):
175
179
  error_msg = f"Syncing `{self.name}` stream isn't available for repository `{stream_slice['repository']}`."
176
- elif e.response.status_code == requests.codes.BAD_GATEWAY:
180
+ elif e._exception.response.status_code == requests.codes.BAD_GATEWAY:
177
181
  error_msg = f"Stream {self.name} temporary failed. Try to re-run sync later"
178
182
  else:
179
183
  # most probably here we're facing a 500 server error and a risk to get a non-json response, so lets output response.text
180
- self.logger.error(f"Undefined error while reading records: {e.response.text}")
184
+ self.logger.error(f"Undefined error while reading records: {e._exception.response.text}")
181
185
  raise e
182
186
 
183
187
  self.logger.warning(error_msg)
@@ -1437,7 +1441,7 @@ class Workflows(SemiIncrementalMixin, GithubStream):
1437
1441
  yield self.transform(record=record, stream_slice=stream_slice)
1438
1442
 
1439
1443
  def convert_cursor_value(self, value):
1440
- return pendulum.parse(value).in_tz(tz="UTC").format("YYYY-MM-DDTHH:mm:ss[Z]")
1444
+ return ab_datetime_format(value, "YYYY-MM-DDTHH:mm:ss[Z]")
1441
1445
 
1442
1446
 
1443
1447
  class WorkflowRuns(SemiIncrementalMixin, GithubStream):
@@ -1478,7 +1482,7 @@ class WorkflowRuns(SemiIncrementalMixin, GithubStream):
1478
1482
  # the state is updated only in the end of the sync as records are sorted in reverse order
1479
1483
  new_state = self.state
1480
1484
  if start_point:
1481
- break_point = (pendulum.parse(start_point) - pendulum.duration(days=self.re_run_period)).to_iso8601_string()
1485
+ break_point = (ab_datetime_parse(start_point) - timedelta(days=self.re_run_period)).isoformat()
1482
1486
  for record in super(SemiIncrementalMixin, self).read_records(
1483
1487
  sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
1484
1488
  ):
@@ -1663,22 +1667,24 @@ class ContributorActivity(GithubStream):
1663
1667
  repository = stream_slice.get("repository", "")
1664
1668
  try:
1665
1669
  yield from super().read_records(stream_slice=stream_slice, **kwargs)
1666
- except UserDefinedBackoffException as e:
1667
- if e.response.status_code == requests.codes.ACCEPTED:
1668
- yield AirbyteMessage(
1669
- type=MessageType.LOG,
1670
- log=AirbyteLogMessage(
1671
- level=Level.INFO,
1672
- message=f"Syncing `{self.__class__.__name__}` " f"stream isn't available for repository `{repository}`.",
1673
- ),
1674
- )
1670
+ # HTTP Client wraps BackoffException into MessageRepresentationAirbyteTracedErrors
1671
+ except MessageRepresentationAirbyteTracedErrors as e:
1672
+ if hasattr(e, "_exception") and hasattr(e._exception, "response"):
1673
+ if e._exception.response.status_code == requests.codes.ACCEPTED:
1674
+ yield AirbyteMessage(
1675
+ type=MessageType.LOG,
1676
+ log=AirbyteLogMessage(
1677
+ level=Level.INFO,
1678
+ message=f"Syncing `{self.__class__.__name__}` " f"stream isn't available for repository `{repository}`.",
1679
+ ),
1680
+ )
1675
1681
 
1676
- # In order to retain the existing stream behavior before we added RFR to this stream, we need to close out the
1677
- # partition after we give up the maximum number of retries on the 202 response. This does lead to the question
1678
- # of if we should prematurely exit in the first place, but for now we're going to aim for feature parity
1679
- partition_obj = stream_slice.get("partition")
1680
- if self.cursor and partition_obj:
1681
- self.cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition_obj))
1682
+ # In order to retain the existing stream behavior before we added RFR to this stream, we need to close out the
1683
+ # partition after we give up the maximum number of retries on the 202 response. This does lead to the question
1684
+ # of if we should prematurely exit in the first place, but for now we're going to aim for feature parity
1685
+ partition_obj = stream_slice.get("partition")
1686
+ if self.cursor and partition_obj:
1687
+ self.cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition_obj))
1682
1688
  else:
1683
1689
  raise e
1684
1690
 
source_github/utils.py CHANGED
@@ -4,16 +4,17 @@
4
4
 
5
5
  import time
6
6
  from dataclasses import dataclass
7
+ from datetime import timedelta
7
8
  from itertools import cycle
8
9
  from typing import Any, List, Mapping
9
10
 
10
- import pendulum
11
11
  import requests
12
12
 
13
13
  from airbyte_cdk.models import SyncMode
14
14
  from airbyte_cdk.sources.streams import Stream
15
15
  from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator
16
16
  from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator
17
+ from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
17
18
 
18
19
 
19
20
  def getter(D: dict, key_or_keys, strict=True):
@@ -43,8 +44,8 @@ class GitHubAPILimitException(Exception):
43
44
  class Token:
44
45
  count_rest: int = 5000
45
46
  count_graphql: int = 5000
46
- reset_at_rest: pendulum.DateTime = pendulum.now()
47
- reset_at_graphql: pendulum.DateTime = pendulum.now()
47
+ reset_at_rest: AirbyteDateTime = ab_datetime_now()
48
+ reset_at_graphql: AirbyteDateTime = ab_datetime_now()
48
49
 
49
50
 
50
51
  class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
@@ -55,7 +56,7 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
55
56
  the first token becomes available again.
56
57
  """
57
58
 
58
- DURATION = pendulum.duration(seconds=3600) # Duration at which the current rate limit window resets
59
+ DURATION = timedelta(seconds=3600) # Duration at which the current rate limit window resets
59
60
 
60
61
  def __init__(self, tokens: List[str], auth_method: str = "token", auth_header: str = "Authorization"):
61
62
  self._auth_method = auth_method
@@ -125,13 +126,13 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
125
126
  remaining_info_core = rate_limit_info.get("core")
126
127
  token_info.count_rest, token_info.reset_at_rest = (
127
128
  remaining_info_core.get("remaining"),
128
- pendulum.from_timestamp(remaining_info_core.get("reset")),
129
+ ab_datetime_parse(remaining_info_core.get("reset")),
129
130
  )
130
131
 
131
132
  remaining_info_graphql = rate_limit_info.get("graphql")
132
133
  token_info.count_graphql, token_info.reset_at_graphql = (
133
134
  remaining_info_graphql.get("remaining"),
134
- pendulum.from_timestamp(remaining_info_graphql.get("reset")),
135
+ ab_datetime_parse(remaining_info_graphql.get("reset")),
135
136
  )
136
137
 
137
138
  def check_all_tokens(self):
@@ -143,7 +144,7 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
143
144
  setattr(current_token, count_attr, getattr(current_token, count_attr) - 1)
144
145
  return True
145
146
  elif all(getattr(x, count_attr) == 0 for x in self._tokens.values()):
146
- min_time_to_wait = min((getattr(x, reset_attr) - pendulum.now()).in_seconds() for x in self._tokens.values())
147
+ min_time_to_wait = min((getattr(x, reset_attr) - ab_datetime_now()).seconds for x in self._tokens.values())
147
148
  if min_time_to_wait < self.max_time:
148
149
  time.sleep(min_time_to_wait if min_time_to_wait > 0 else 0)
149
150
  self.check_all_tokens()