airbyte-source-github 2.1.26__tar.gz → 2.1.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/PKG-INFO +1 -1
  2. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/pyproject.toml +1 -1
  3. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/errors_handlers.py +57 -6
  4. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/streams.py +71 -8
  5. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/README.md +0 -0
  6. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/__init__.py +0 -0
  7. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/backoff_strategies.py +0 -0
  8. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/config_migrations.py +0 -0
  9. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/constants.py +0 -0
  10. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/github_schema.py +0 -0
  11. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/graphql.py +0 -0
  12. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/run.py +0 -0
  13. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/assignees.json +0 -0
  14. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/branches.json +0 -0
  15. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/collaborators.json +0 -0
  16. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/comments.json +0 -0
  17. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/commit_comment_reactions.json +0 -0
  18. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/commit_comments.json +0 -0
  19. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/commits.json +0 -0
  20. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/contributor_activity.json +0 -0
  21. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/deployments.json +0 -0
  22. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/events.json +0 -0
  23. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/issue_comment_reactions.json +0 -0
  24. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/issue_events.json +0 -0
  25. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/issue_labels.json +0 -0
  26. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/issue_milestones.json +0 -0
  27. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/issue_reactions.json +0 -0
  28. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/issue_timeline_events.json +0 -0
  29. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/issues.json +0 -0
  30. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/organizations.json +0 -0
  31. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/project_cards.json +0 -0
  32. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/project_columns.json +0 -0
  33. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/projects.json +0 -0
  34. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/projects_v2.json +0 -0
  35. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/pull_request_comment_reactions.json +0 -0
  36. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/pull_request_commits.json +0 -0
  37. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/pull_request_stats.json +0 -0
  38. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/pull_requests.json +0 -0
  39. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/releases.json +0 -0
  40. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/repositories.json +0 -0
  41. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/review_comments.json +0 -0
  42. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/reviews.json +0 -0
  43. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/shared/events/comment.json +0 -0
  44. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/shared/events/commented.json +0 -0
  45. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/shared/events/committed.json +0 -0
  46. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/shared/events/cross_referenced.json +0 -0
  47. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/shared/events/reviewed.json +0 -0
  48. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/shared/reaction.json +0 -0
  49. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/shared/reactions.json +0 -0
  50. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/shared/user.json +0 -0
  51. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/shared/user_graphql.json +0 -0
  52. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/stargazers.json +0 -0
  53. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/tags.json +0 -0
  54. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/team_members.json +0 -0
  55. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/team_memberships.json +0 -0
  56. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/teams.json +0 -0
  57. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/users.json +0 -0
  58. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/workflow_jobs.json +0 -0
  59. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/workflow_runs.json +0 -0
  60. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/schemas/workflows.json +0 -0
  61. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/source.py +0 -0
  62. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/spec.json +0 -0
  63. {airbyte_source_github-2.1.26 → airbyte_source_github-2.1.28}/source_github/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-github
3
- Version: 2.1.26
3
+ Version: 2.1.28
4
4
  Summary: Source implementation for GitHub.
5
5
  Home-page: https://airbyte.com
6
6
  License: ELv2
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
3
3
  build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
- version = "2.1.26"
6
+ version = "2.1.28"
7
7
  name = "airbyte-source-github"
8
8
  description = "Source implementation for GitHub."
9
9
  authors = [ "Airbyte <contact@airbyte.io>",]
@@ -2,6 +2,7 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ import logging
5
6
  from typing import Optional, Union
6
7
 
7
8
  import requests
@@ -14,6 +15,9 @@ from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping impor
14
15
  from . import constants
15
16
 
16
17
 
18
+ logger = logging.getLogger("airbyte")
19
+
20
+
17
21
  GITHUB_DEFAULT_ERROR_MAPPING = DEFAULT_ERROR_MAPPING | {
18
22
  401: ErrorResolution(
19
23
  response_action=ResponseAction.RETRY,
@@ -54,7 +58,14 @@ GITHUB_DEFAULT_ERROR_MAPPING = DEFAULT_ERROR_MAPPING | {
54
58
 
55
59
  def is_conflict_with_empty_repository(response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> bool:
56
60
  if isinstance(response_or_exception, requests.Response) and response_or_exception.status_code == requests.codes.CONFLICT:
57
- response_data = response_or_exception.json()
61
+ try:
62
+ response_data = response_or_exception.json()
63
+ except ValueError:
64
+ logger.warning(
65
+ "is_conflict_with_empty_repository received non-JSON 409 response (first 50 chars: %r).",
66
+ response_or_exception.text[:50],
67
+ )
68
+ return False
58
69
  return response_data.get("message") == "Git Repository is empty."
59
70
  return False
60
71
 
@@ -64,6 +75,10 @@ def is_gone_with_feature_disabled(response_or_exception: Optional[Union[requests
64
75
  try:
65
76
  message = (response_or_exception.json().get("message") or "").lower()
66
77
  except ValueError:
78
+ logger.warning(
79
+ "is_gone_with_feature_disabled received non-JSON 410 response (first 50 chars: %r).",
80
+ response_or_exception.text[:50],
81
+ )
67
82
  return False
68
83
  return "are disabled" in message or "is disabled" in message
69
84
  return False
@@ -74,6 +89,18 @@ class GithubStreamABCErrorHandler(HttpStatusErrorHandler):
74
89
  self.stream = stream
75
90
  super().__init__(**kwargs)
76
91
 
92
+ def _safe_json_check_graphql_rate_limited(self, response: requests.Response) -> bool:
93
+ try:
94
+ body = response.json()
95
+ except ValueError:
96
+ self._logger.warning(
97
+ "GraphQL rate-limit check received non-JSON response (HTTP %s, first 50 chars: %r).",
98
+ response.status_code,
99
+ response.text[:50],
100
+ )
101
+ return False
102
+ return self.stream.check_graphql_rate_limited(body or {})
103
+
77
104
  def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
78
105
  if isinstance(response_or_exception, requests.Response):
79
106
  retry_flag = (
@@ -81,7 +108,7 @@ class GithubStreamABCErrorHandler(HttpStatusErrorHandler):
81
108
  # https://docs.github.com/en/graphql/overview/resource-limitations
82
109
  (
83
110
  response_or_exception.headers.get("X-RateLimit-Resource") == "graphql"
84
- and self.stream.check_graphql_rate_limited(response_or_exception.json())
111
+ and self._safe_json_check_graphql_rate_limited(response_or_exception)
85
112
  )
86
113
  # Rate limit HTTP headers
87
114
  # https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limit-http-headers
@@ -162,25 +189,49 @@ class ContributorActivityErrorHandler(GithubStreamABCErrorHandler):
162
189
 
163
190
 
164
191
  class GitHubGraphQLErrorHandler(GithubStreamABCErrorHandler):
192
+ def _safe_json_get_errors(self, response: requests.Response) -> bool:
193
+ try:
194
+ body = response.json()
195
+ except ValueError:
196
+ return False
197
+ return bool((body or {}).get("errors"))
198
+
165
199
  def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
166
200
  if isinstance(response_or_exception, requests.Response):
167
201
  if response_or_exception.status_code in (requests.codes.BAD_GATEWAY, requests.codes.GATEWAY_TIMEOUT):
168
- self.stream.page_size = int(self.stream.page_size / 2)
202
+ # Halve the page size on every 502/504 to reduce GraphQL query cost,
203
+ # but never let it drop below 1 — a page_size of 0 would request no
204
+ # records and cause infinite paging.
205
+ previous_page_size = self.stream.page_size
206
+ self.stream.page_size = max(1, int(self.stream.page_size / 2))
207
+ self._logger.info(
208
+ "GitHub GraphQL endpoint returned HTTP %s for stream `%s`; reducing GraphQL page_size from %s to %s and retrying.",
209
+ response_or_exception.status_code,
210
+ self.stream.name,
211
+ previous_page_size,
212
+ self.stream.page_size,
213
+ )
169
214
  return ErrorResolution(
170
215
  response_action=ResponseAction.RETRY,
171
216
  failure_type=FailureType.transient_error,
172
- error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
217
+ error_message=(
218
+ f"GitHub GraphQL endpoint returned HTTP {response_or_exception.status_code} "
219
+ f"for stream `{self.stream.name}`. Reducing GraphQL page size and retrying."
220
+ ),
173
221
  )
174
222
 
175
223
  self.stream.page_size = (
176
224
  constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM if self.stream.large_stream else constants.DEFAULT_PAGE_SIZE
177
225
  )
178
226
 
179
- if response_or_exception.json().get("errors"):
227
+ if self._safe_json_get_errors(response_or_exception):
180
228
  return ErrorResolution(
181
229
  response_action=ResponseAction.RETRY,
182
230
  failure_type=FailureType.transient_error,
183
- error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
231
+ error_message=(
232
+ f"GitHub GraphQL endpoint returned errors in the response body "
233
+ f"for stream `{self.stream.name}` (HTTP {response_or_exception.status_code}). Retrying."
234
+ ),
184
235
  )
185
236
 
186
237
  return super().interpret_response(response_or_exception)
@@ -201,6 +201,12 @@ class GithubStreamABC(HttpStream, ABC):
201
201
  f"GitHub returned HTTP 502 Bad Gateway for stream `{self.name}` after exhausting retries. "
202
202
  f"This is usually transient — the next sync attempt should succeed."
203
203
  )
204
+ elif e._exception.response.status_code == requests.codes.GATEWAY_TIMEOUT:
205
+ error_msg = (
206
+ f"GitHub returned HTTP 504 Gateway Timeout for stream `{self.name}` after exhausting retries "
207
+ f"and reducing the GraphQL page size. The next sync attempt should succeed; "
208
+ f'if 504s persist, lower "Page size for large streams" in the source configuration.'
209
+ )
204
210
  else:
205
211
  self.logger.error(f"Undefined error while reading records: {e._exception.response.text}")
206
212
  raise e
@@ -232,11 +238,15 @@ class GithubStream(GithubStreamABC):
232
238
  def get_error_display_message(self, exception: BaseException) -> Optional[str]:
233
239
  if (
234
240
  isinstance(exception, DefaultBackoffException)
235
- and exception.response.status_code == requests.codes.BAD_GATEWAY
241
+ and exception.response.status_code in (requests.codes.BAD_GATEWAY, requests.codes.GATEWAY_TIMEOUT)
236
242
  and self.large_stream
237
243
  and self.page_size > 1
238
244
  ):
239
- return f'Please try to decrease the "Page size for large streams" below {self.page_size}. The stream "{self.name}" is a large stream, such streams can fail with 502 for high "page_size" values.'
245
+ return (
246
+ f'Please try to decrease the "Page size for large streams" below {self.page_size}. '
247
+ f'The stream "{self.name}" is a large stream, such streams can fail with '
248
+ f'{exception.response.status_code} for high "page_size" values.'
249
+ )
240
250
  return super().get_error_display_message(exception)
241
251
 
242
252
  def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
@@ -251,6 +261,39 @@ class GithubStream(GithubStreamABC):
251
261
 
252
262
  return record
253
263
 
264
+ def _safe_json_list(self, response: requests.Response, key: Optional[str] = None) -> Optional[list]:
265
+ """Parse JSON from `response` and return a list, or ``None`` on failure.
266
+
267
+ When `key` is provided the body is expected to be a dict and the list is
268
+ extracted via ``body[key]``. When `key` is ``None`` the body itself must
269
+ be a list. On any parse/validation failure a warning is logged and
270
+ ``None`` is returned so callers can short-circuit gracefully.
271
+ """
272
+ try:
273
+ body = response.json()
274
+ except ValueError:
275
+ self.logger.warning(
276
+ "`%s` received non-JSON response (HTTP %s, first 50 chars: %r).",
277
+ self.name,
278
+ response.status_code,
279
+ response.text[:50],
280
+ )
281
+ return None
282
+ if key is not None:
283
+ items = (body or {}).get(key)
284
+ else:
285
+ items = body
286
+ if not isinstance(items, list):
287
+ self.logger.warning(
288
+ "`%s` response has unexpected structure (HTTP %s, key=%r, got %s).",
289
+ self.name,
290
+ response.status_code,
291
+ key,
292
+ type(items).__name__,
293
+ )
294
+ return None
295
+ return items
296
+
254
297
  def parse_response(
255
298
  self,
256
299
  response: requests.Response,
@@ -817,6 +860,16 @@ class Releases(SemiIncrementalMixin, GitHubGraphQLStream):
817
860
 
818
861
  cursor_field = "created_at"
819
862
  is_sorted = "asc"
863
+ # The Releases GraphQL query is high-cost on the server side: every node
864
+ # materializes `description` and `descriptionHTML`, which forces GitHub
865
+ # to render each release body to HTML. On repositories with long release
866
+ # notes, a page_size of 100 pushes the resolver past its internal 10s
867
+ # deadline and returns 504 Gateway Timeout (reproduced deterministically
868
+ # against nodejs/node: first=100 -> 504 in ~11s; first=10 -> 200 in ~3s).
869
+ # `releaseAssets` / `reactionGroups` / `mentions` are NOT the cost driver
870
+ # — stripping them does not fix the timeout, only lowering `first` does.
871
+ # Mark as large_stream so it picks up the smaller default page size.
872
+ large_stream = True
820
873
 
821
874
  GRAPHQL_REACTION_TO_REST = {
822
875
  "THUMBS_UP": "plus_one",
@@ -1595,8 +1648,10 @@ class Workflows(SemiIncrementalMixin, GithubStream):
1595
1648
  return f"repos/{stream_slice['repository']}/actions/workflows"
1596
1649
 
1597
1650
  def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
1598
- response = response.json().get("workflows")
1599
- for record in response:
1651
+ items = self._safe_json_list(response, key="workflows")
1652
+ if items is None:
1653
+ return
1654
+ for record in items:
1600
1655
  yield self.transform(record=record, stream_slice=stream_slice)
1601
1656
 
1602
1657
  def convert_cursor_value(self, value):
@@ -1620,8 +1675,10 @@ class WorkflowRuns(SemiIncrementalMixin, GithubStream):
1620
1675
  return f"repos/{stream_slice['repository']}/actions/runs"
1621
1676
 
1622
1677
  def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
1623
- response = response.json().get("workflow_runs")
1624
- for record in response:
1678
+ items = self._safe_json_list(response, key="workflow_runs")
1679
+ if items is None:
1680
+ return
1681
+ for record in items:
1625
1682
  yield record
1626
1683
 
1627
1684
  def read_records(
@@ -1699,7 +1756,10 @@ class WorkflowJobs(SemiIncrementalMixin, GithubStream):
1699
1756
  stream_slice: Mapping[str, Any] = None,
1700
1757
  next_page_token: Mapping[str, Any] = None,
1701
1758
  ) -> Iterable[Mapping]:
1702
- for record in response.json()["jobs"]:
1759
+ items = self._safe_json_list(response, key="jobs")
1760
+ if items is None:
1761
+ return
1762
+ for record in items:
1703
1763
  if record.get(self.cursor_field):
1704
1764
  yield self.transform(record=record, stream_slice=stream_slice)
1705
1765
 
@@ -1885,8 +1945,11 @@ class IssueTimelineEvents(GithubStream):
1885
1945
  stream_slice: Mapping[str, Any] = None,
1886
1946
  next_page_token: Mapping[str, Any] = None,
1887
1947
  ) -> Iterable[Mapping]:
1888
- events_list = response.json()
1889
1948
  record = {"repository": stream_slice["repository"], "issue_number": stream_slice["number"]}
1949
+ events_list = self._safe_json_list(response)
1950
+ if events_list is None:
1951
+ yield record
1952
+ return
1890
1953
  for event in events_list:
1891
1954
  record[event["event"]] = event
1892
1955
  yield record