airbyte-source-github 2.1.25__tar.gz → 2.1.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/PKG-INFO +1 -1
  2. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/pyproject.toml +1 -1
  3. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/errors_handlers.py +61 -5
  4. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/streams.py +53 -16
  5. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/README.md +0 -0
  6. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/__init__.py +0 -0
  7. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/backoff_strategies.py +0 -0
  8. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/config_migrations.py +0 -0
  9. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/constants.py +0 -0
  10. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/github_schema.py +0 -0
  11. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/graphql.py +0 -0
  12. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/run.py +0 -0
  13. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/assignees.json +0 -0
  14. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/branches.json +0 -0
  15. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/collaborators.json +0 -0
  16. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/comments.json +0 -0
  17. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/commit_comment_reactions.json +0 -0
  18. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/commit_comments.json +0 -0
  19. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/commits.json +0 -0
  20. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/contributor_activity.json +0 -0
  21. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/deployments.json +0 -0
  22. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/events.json +0 -0
  23. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/issue_comment_reactions.json +0 -0
  24. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/issue_events.json +0 -0
  25. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/issue_labels.json +0 -0
  26. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/issue_milestones.json +0 -0
  27. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/issue_reactions.json +0 -0
  28. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/issue_timeline_events.json +0 -0
  29. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/issues.json +0 -0
  30. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/organizations.json +0 -0
  31. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/project_cards.json +0 -0
  32. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/project_columns.json +0 -0
  33. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/projects.json +0 -0
  34. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/projects_v2.json +0 -0
  35. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/pull_request_comment_reactions.json +0 -0
  36. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/pull_request_commits.json +0 -0
  37. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/pull_request_stats.json +0 -0
  38. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/pull_requests.json +0 -0
  39. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/releases.json +0 -0
  40. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/repositories.json +0 -0
  41. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/review_comments.json +0 -0
  42. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/reviews.json +0 -0
  43. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/shared/events/comment.json +0 -0
  44. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/shared/events/commented.json +0 -0
  45. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/shared/events/committed.json +0 -0
  46. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/shared/events/cross_referenced.json +0 -0
  47. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/shared/events/reviewed.json +0 -0
  48. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/shared/reaction.json +0 -0
  49. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/shared/reactions.json +0 -0
  50. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/shared/user.json +0 -0
  51. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/shared/user_graphql.json +0 -0
  52. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/stargazers.json +0 -0
  53. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/tags.json +0 -0
  54. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/team_members.json +0 -0
  55. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/team_memberships.json +0 -0
  56. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/teams.json +0 -0
  57. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/users.json +0 -0
  58. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/workflow_jobs.json +0 -0
  59. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/workflow_runs.json +0 -0
  60. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/schemas/workflows.json +0 -0
  61. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/source.py +0 -0
  62. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/spec.json +0 -0
  63. {airbyte_source_github-2.1.25 → airbyte_source_github-2.1.27}/source_github/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-github
3
- Version: 2.1.25
3
+ Version: 2.1.27
4
4
  Summary: Source implementation for GitHub.
5
5
  Home-page: https://airbyte.com
6
6
  License: ELv2
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
3
3
  build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
- version = "2.1.25"
6
+ version = "2.1.27"
7
7
  name = "airbyte-source-github"
8
8
  description = "Source implementation for GitHub."
9
9
  authors = [ "Airbyte <contact@airbyte.io>",]
@@ -2,6 +2,7 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ import logging
5
6
  from typing import Optional, Union
6
7
 
7
8
  import requests
@@ -14,6 +15,9 @@ from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping impor
14
15
  from . import constants
15
16
 
16
17
 
18
+ logger = logging.getLogger("airbyte")
19
+
20
+
17
21
  GITHUB_DEFAULT_ERROR_MAPPING = DEFAULT_ERROR_MAPPING | {
18
22
  401: ErrorResolution(
19
23
  response_action=ResponseAction.RETRY,
@@ -41,25 +45,62 @@ GITHUB_DEFAULT_ERROR_MAPPING = DEFAULT_ERROR_MAPPING | {
41
45
  error_message="Conflict.",
42
46
  ),
43
47
  410: ErrorResolution(
44
- response_action=ResponseAction.RETRY,
48
+ response_action=ResponseAction.FAIL,
45
49
  failure_type=FailureType.config_error,
46
- error_message="Gone. Please ensure the url is valid.",
50
+ error_message=(
51
+ "GitHub returned 410 Gone for an unexpected reason. "
52
+ "The endpoint or API version may be deprecated. "
53
+ "Verify the connector version is current and the endpoint is still supported."
54
+ ),
47
55
  ),
48
56
  }
49
57
 
50
58
 
51
59
  def is_conflict_with_empty_repository(response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> bool:
52
60
  if isinstance(response_or_exception, requests.Response) and response_or_exception.status_code == requests.codes.CONFLICT:
53
- response_data = response_or_exception.json()
61
+ try:
62
+ response_data = response_or_exception.json()
63
+ except ValueError:
64
+ logger.warning(
65
+ "is_conflict_with_empty_repository received non-JSON 409 response (first 50 chars: %r).",
66
+ response_or_exception.text[:50],
67
+ )
68
+ return False
54
69
  return response_data.get("message") == "Git Repository is empty."
55
70
  return False
56
71
 
57
72
 
73
+ def is_gone_with_feature_disabled(response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> bool:
74
+ if isinstance(response_or_exception, requests.Response) and response_or_exception.status_code == requests.codes.GONE:
75
+ try:
76
+ message = (response_or_exception.json().get("message") or "").lower()
77
+ except ValueError:
78
+ logger.warning(
79
+ "is_gone_with_feature_disabled received non-JSON 410 response (first 50 chars: %r).",
80
+ response_or_exception.text[:50],
81
+ )
82
+ return False
83
+ return "are disabled" in message or "is disabled" in message
84
+ return False
85
+
86
+
58
87
  class GithubStreamABCErrorHandler(HttpStatusErrorHandler):
59
88
  def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa
60
89
  self.stream = stream
61
90
  super().__init__(**kwargs)
62
91
 
92
+ def _safe_json_check_graphql_rate_limited(self, response: requests.Response) -> bool:
93
+ try:
94
+ body = response.json()
95
+ except ValueError:
96
+ self._logger.warning(
97
+ "GraphQL rate-limit check received non-JSON response (HTTP %s, first 50 chars: %r).",
98
+ response.status_code,
99
+ response.text[:50],
100
+ )
101
+ return False
102
+ return self.stream.check_graphql_rate_limited(body or {})
103
+
63
104
  def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
64
105
  if isinstance(response_or_exception, requests.Response):
65
106
  retry_flag = (
@@ -67,7 +108,7 @@ class GithubStreamABCErrorHandler(HttpStatusErrorHandler):
67
108
  # https://docs.github.com/en/graphql/overview/resource-limitations
68
109
  (
69
110
  response_or_exception.headers.get("X-RateLimit-Resource") == "graphql"
70
- and self.stream.check_graphql_rate_limited(response_or_exception.json())
111
+ and self._safe_json_check_graphql_rate_limited(response_or_exception)
71
112
  )
72
113
  # Rate limit HTTP headers
73
114
  # https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limit-http-headers
@@ -115,6 +156,14 @@ class GithubStreamABCErrorHandler(HttpStatusErrorHandler):
115
156
  error_message=log_message,
116
157
  )
117
158
 
159
+ if is_gone_with_feature_disabled(response_or_exception=response_or_exception):
160
+ log_message = f"Skipping stream slice for '{response_or_exception.url}': {response_or_exception.json().get('message', 'Feature disabled')}."
161
+ return ErrorResolution(
162
+ response_action=ResponseAction.IGNORE,
163
+ failure_type=FailureType.config_error,
164
+ error_message=log_message,
165
+ )
166
+
118
167
  return super().interpret_response(response_or_exception)
119
168
 
120
169
 
@@ -140,6 +189,13 @@ class ContributorActivityErrorHandler(GithubStreamABCErrorHandler):
140
189
 
141
190
 
142
191
  class GitHubGraphQLErrorHandler(GithubStreamABCErrorHandler):
192
+ def _safe_json_get_errors(self, response: requests.Response) -> bool:
193
+ try:
194
+ body = response.json()
195
+ except ValueError:
196
+ return False
197
+ return bool((body or {}).get("errors"))
198
+
143
199
  def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
144
200
  if isinstance(response_or_exception, requests.Response):
145
201
  if response_or_exception.status_code in (requests.codes.BAD_GATEWAY, requests.codes.GATEWAY_TIMEOUT):
@@ -154,7 +210,7 @@ class GitHubGraphQLErrorHandler(GithubStreamABCErrorHandler):
154
210
  constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM if self.stream.large_stream else constants.DEFAULT_PAGE_SIZE
155
211
  )
156
212
 
157
- if response_or_exception.json().get("errors"):
213
+ if self._safe_json_get_errors(response_or_exception):
158
214
  return ErrorResolution(
159
215
  response_action=ResponseAction.RETRY,
160
216
  failure_type=FailureType.transient_error,
@@ -34,6 +34,7 @@ from .errors_handlers import (
34
34
  GitHubGraphQLErrorHandler,
35
35
  GithubStreamABCErrorHandler,
36
36
  is_conflict_with_empty_repository,
37
+ is_gone_with_feature_disabled,
37
38
  )
38
39
  from .graphql import (
39
40
  CursorStorage,
@@ -187,13 +188,7 @@ class GithubStreamABC(HttpStream, ABC):
187
188
  f"Your Personal Access Token may need to be renewed. GitHub message: {api_message!r}"
188
189
  )
189
190
  raise e
190
- elif e._exception.response.status_code == requests.codes.GONE and isinstance(self, Projects):
191
- # Some repos don't have projects enabled and we get "410 Client Error: Gone for
192
- # url: https://api.github.com/repos/xyz/projects?per_page=100" error.
193
- error_msg = (
194
- f"GitHub Projects (classic) is disabled for repository `{stream_slice['repository']}`. "
195
- f"Skipping the `Projects` stream for this repository."
196
- )
191
+
197
192
  elif e._exception.response.status_code == requests.codes.CONFLICT:
198
193
  error_msg = (
199
194
  f"Skipping `{self.name}` for repository `{stream_slice['repository']}`: "
@@ -256,6 +251,39 @@ class GithubStream(GithubStreamABC):
256
251
 
257
252
  return record
258
253
 
254
+ def _safe_json_list(self, response: requests.Response, key: Optional[str] = None) -> Optional[list]:
255
+ """Parse JSON from `response` and return a list, or ``None`` on failure.
256
+
257
+ When `key` is provided the body is expected to be a dict and the list is
258
+ extracted via ``body[key]``. When `key` is ``None`` the body itself must
259
+ be a list. On any parse/validation failure a warning is logged and
260
+ ``None`` is returned so callers can short-circuit gracefully.
261
+ """
262
+ try:
263
+ body = response.json()
264
+ except ValueError:
265
+ self.logger.warning(
266
+ "`%s` received non-JSON response (HTTP %s, first 50 chars: %r).",
267
+ self.name,
268
+ response.status_code,
269
+ response.text[:50],
270
+ )
271
+ return None
272
+ if key is not None:
273
+ items = (body or {}).get(key)
274
+ else:
275
+ items = body
276
+ if not isinstance(items, list):
277
+ self.logger.warning(
278
+ "`%s` response has unexpected structure (HTTP %s, key=%r, got %s).",
279
+ self.name,
280
+ response.status_code,
281
+ key,
282
+ type(items).__name__,
283
+ )
284
+ return None
285
+ return items
286
+
259
287
  def parse_response(
260
288
  self,
261
289
  response: requests.Response,
@@ -263,9 +291,8 @@ class GithubStream(GithubStreamABC):
263
291
  stream_slice: Mapping[str, Any] = None,
264
292
  next_page_token: Mapping[str, Any] = None,
265
293
  ) -> Iterable[Mapping]:
266
- if is_conflict_with_empty_repository(response):
267
- # I would expect that this should be handled (skipped) by the error handler, but it seems like
268
- # ignored this error but continue to processing records. This may be fixed in latest CDK versions.
294
+ if is_conflict_with_empty_repository(response) or is_gone_with_feature_disabled(response):
295
+ # The CDK IGNORE action still calls parse_response; guard against non-array error bodies.
269
296
  return
270
297
  yield from super().parse_response(
271
298
  response=response,
@@ -1601,8 +1628,10 @@ class Workflows(SemiIncrementalMixin, GithubStream):
1601
1628
  return f"repos/{stream_slice['repository']}/actions/workflows"
1602
1629
 
1603
1630
  def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
1604
- response = response.json().get("workflows")
1605
- for record in response:
1631
+ items = self._safe_json_list(response, key="workflows")
1632
+ if items is None:
1633
+ return
1634
+ for record in items:
1606
1635
  yield self.transform(record=record, stream_slice=stream_slice)
1607
1636
 
1608
1637
  def convert_cursor_value(self, value):
@@ -1626,8 +1655,10 @@ class WorkflowRuns(SemiIncrementalMixin, GithubStream):
1626
1655
  return f"repos/{stream_slice['repository']}/actions/runs"
1627
1656
 
1628
1657
  def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
1629
- response = response.json().get("workflow_runs")
1630
- for record in response:
1658
+ items = self._safe_json_list(response, key="workflow_runs")
1659
+ if items is None:
1660
+ return
1661
+ for record in items:
1631
1662
  yield record
1632
1663
 
1633
1664
  def read_records(
@@ -1705,7 +1736,10 @@ class WorkflowJobs(SemiIncrementalMixin, GithubStream):
1705
1736
  stream_slice: Mapping[str, Any] = None,
1706
1737
  next_page_token: Mapping[str, Any] = None,
1707
1738
  ) -> Iterable[Mapping]:
1708
- for record in response.json()["jobs"]:
1739
+ items = self._safe_json_list(response, key="jobs")
1740
+ if items is None:
1741
+ return
1742
+ for record in items:
1709
1743
  if record.get(self.cursor_field):
1710
1744
  yield self.transform(record=record, stream_slice=stream_slice)
1711
1745
 
@@ -1891,8 +1925,11 @@ class IssueTimelineEvents(GithubStream):
1891
1925
  stream_slice: Mapping[str, Any] = None,
1892
1926
  next_page_token: Mapping[str, Any] = None,
1893
1927
  ) -> Iterable[Mapping]:
1894
- events_list = response.json()
1895
1928
  record = {"repository": stream_slice["repository"], "issue_number": stream_slice["number"]}
1929
+ events_list = self._safe_json_list(response)
1930
+ if events_list is None:
1931
+ yield record
1932
+ return
1896
1933
  for event in events_list:
1897
1934
  record[event["event"]] = event
1898
1935
  yield record