airbyte-source-github 2.1.21__tar.gz → 2.1.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/PKG-INFO +1 -1
  2. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/pyproject.toml +1 -1
  3. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/source.py +1 -1
  4. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/spec.json +4 -4
  5. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/streams.py +10 -6
  6. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/utils.py +89 -2
  7. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/README.md +0 -0
  8. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/__init__.py +0 -0
  9. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/backoff_strategies.py +0 -0
  10. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/config_migrations.py +0 -0
  11. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/constants.py +0 -0
  12. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/errors_handlers.py +0 -0
  13. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/github_schema.py +0 -0
  14. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/graphql.py +0 -0
  15. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/run.py +0 -0
  16. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/assignees.json +0 -0
  17. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/branches.json +0 -0
  18. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/collaborators.json +0 -0
  19. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/comments.json +0 -0
  20. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/commit_comment_reactions.json +0 -0
  21. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/commit_comments.json +0 -0
  22. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/commits.json +0 -0
  23. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/contributor_activity.json +0 -0
  24. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/deployments.json +0 -0
  25. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/events.json +0 -0
  26. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/issue_comment_reactions.json +0 -0
  27. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/issue_events.json +0 -0
  28. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/issue_labels.json +0 -0
  29. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/issue_milestones.json +0 -0
  30. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/issue_reactions.json +0 -0
  31. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/issue_timeline_events.json +0 -0
  32. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/issues.json +0 -0
  33. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/organizations.json +0 -0
  34. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/project_cards.json +0 -0
  35. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/project_columns.json +0 -0
  36. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/projects.json +0 -0
  37. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/projects_v2.json +0 -0
  38. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/pull_request_comment_reactions.json +0 -0
  39. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/pull_request_commits.json +0 -0
  40. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/pull_request_stats.json +0 -0
  41. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/pull_requests.json +0 -0
  42. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/releases.json +0 -0
  43. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/repositories.json +0 -0
  44. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/review_comments.json +0 -0
  45. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/reviews.json +0 -0
  46. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/shared/events/comment.json +0 -0
  47. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/shared/events/commented.json +0 -0
  48. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/shared/events/committed.json +0 -0
  49. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/shared/events/cross_referenced.json +0 -0
  50. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/shared/events/reviewed.json +0 -0
  51. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/shared/reaction.json +0 -0
  52. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/shared/reactions.json +0 -0
  53. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/shared/user.json +0 -0
  54. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/shared/user_graphql.json +0 -0
  55. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/stargazers.json +0 -0
  56. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/tags.json +0 -0
  57. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/team_members.json +0 -0
  58. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/team_memberships.json +0 -0
  59. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/teams.json +0 -0
  60. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/users.json +0 -0
  61. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/workflow_jobs.json +0 -0
  62. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/workflow_runs.json +0 -0
  63. {airbyte_source_github-2.1.21 → airbyte_source_github-2.1.23}/source_github/schemas/workflows.json +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-github
3
- Version: 2.1.21
3
+ Version: 2.1.23
4
4
  Summary: Source implementation for GitHub.
5
5
  Home-page: https://airbyte.com
6
6
  License: ELv2
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
3
3
  build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
- version = "2.1.21"
6
+ version = "2.1.23"
7
7
  name = "airbyte-source-github"
8
8
  description = "Source implementation for GitHub."
9
9
  authors = [ "Airbyte <contact@airbyte.io>",]
@@ -241,7 +241,7 @@ class SourceGithub(AbstractSource):
241
241
  # This parameter is deprecated and in future will be used sane default, page_size: 10
242
242
  page_size = config.get("page_size_for_large_streams", constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM)
243
243
  access_token_type, _ = self.get_access_token(config)
244
- max_waiting_time = config.get("max_waiting_time", 10) * 60
244
+ max_waiting_time = config.get("max_waiting_time", 120) * 60
245
245
  organization_args = {
246
246
  "authenticator": authenticator,
247
247
  "organizations": organizations,
@@ -135,11 +135,11 @@
135
135
  "max_waiting_time": {
136
136
  "type": "integer",
137
137
  "title": "Max Waiting Time (in minutes)",
138
- "examples": [10, 30, 60],
139
- "default": 10,
138
+ "examples": [10, 60, 120],
139
+ "default": 120,
140
140
  "minimum": 1,
141
- "maximum": 60,
142
- "description": "Max Waiting Time for rate limit. Set higher value to wait till rate limits will be resetted to continue sync",
141
+ "maximum": 240,
142
+ "description": "Max time (in minutes) the connector will wait when all API tokens are rate-limited before failing. GitHub rate limits reset every 60 minutes, so values above 60 allow the connector to wait for a full reset cycle.",
143
143
  "order": 5
144
144
  }
145
145
  }
@@ -138,7 +138,11 @@ class GithubStreamABC(HttpStream, ABC):
138
138
  # This whole try/except situation in `read_records()` isn't good but right now in `self._send_request()`
139
139
  # function we have `response.raise_for_status()` so we don't have much choice on how to handle errors.
140
140
  # Bocked on https://github.com/airbytehq/airbyte/issues/3514.
141
- if not hasattr(e, "_exception") or not hasattr(e._exception, "response"):
141
+ # `requests.RequestException` subclasses always expose a `response` attribute, but it
142
+ # defaults to `None` for transport-layer failures (ConnectionError, ConnectTimeout,
143
+ # ReadTimeout, SSLError, DNS failures, etc.). Treat a missing or `None` response as
144
+ # something this handler cannot classify, and let the CDK surface it.
145
+ if not hasattr(e, "_exception") or getattr(e._exception, "response", None) is None:
142
146
  raise e
143
147
  if e._exception.response.status_code == requests.codes.NOT_FOUND:
144
148
  # A lot of streams are not available for repositories owned by a user instead of an organization.
@@ -190,11 +194,11 @@ class GithubStreamABC(HttpStream, ABC):
190
194
 
191
195
  self.logger.warning(error_msg)
192
196
  except GitHubAPILimitException as e:
193
- internal_message = (
194
- f"Stream: `{self.name}`, slice: `{stream_slice}`. Limits for all provided tokens are reached, please try again later"
195
- )
196
- message = "Rate Limits for all provided tokens are reached. For more information please refer to documentation: https://docs.airbyte.com/integrations/sources/github#limitations--troubleshooting"
197
- raise AirbyteTracedException(internal_message=internal_message, message=message, failure_type=FailureType.config_error) from e
197
+ internal_message = f"Stream: `{self.name}`, slice: `{stream_slice}`. {e}"
198
+ message = "Rate limit exceeded for all configured GitHub API tokens."
199
+ raise AirbyteTracedException(
200
+ internal_message=internal_message, message=message, failure_type=FailureType.transient_error
201
+ ) from e
198
202
 
199
203
 
200
204
  class GithubStream(GithubStreamABC):
@@ -56,9 +56,16 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
56
56
  If a token exceeds the capacity limit, the system switches to another token.
57
57
  If all tokens are exhausted, the system will enter a sleep state until
58
58
  the first token becomes available again.
59
+
60
+ An API budget mechanism throttles requests proactively: when a token's
61
+ remaining quota drops below a configurable reserve, a small delay is
62
+ injected before the request so that the connector never fully exhausts
63
+ all tokens at once.
59
64
  """
60
65
 
61
66
  DURATION = timedelta(seconds=3600) # Duration at which the current rate limit window resets
67
+ BUDGET_RESERVE_FRACTION = 0.1 # Start throttling when only 10% of quota remains
68
+ BUDGET_MIN_RESERVE = 50 # Always keep at least this many calls in reserve per token
62
69
 
63
70
  def __init__(self, tokens: List[str], auth_method: str = "token", auth_header: str = "Authorization"):
64
71
  self._logger = logging.getLogger("airbyte")
@@ -72,7 +79,8 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
72
79
  self.check_all_tokens()
73
80
  self._tokens_iter = cycle(self._tokens)
74
81
  self._active_token = next(self._tokens_iter)
75
- self._max_time = 60 * 10 # 10 minutes as default
82
+ self._max_time = 60 * 120 # 120 minutes as default (must exceed GitHub's 60-min rate limit window)
83
+ self._budget_logged = False # avoid log spam for throttle messages
76
84
 
77
85
  def _initialize_http_clients(self, tokens: List[str]) -> Mapping[str, HttpClient]:
78
86
  return {
@@ -169,15 +177,94 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator):
169
177
  def check_all_tokens(self):
170
178
  for token in self._tokens:
171
179
  self._check_token_limits(token)
180
+ self._budget_logged = False
181
+
182
+ def _get_budget_reserve(self, token: Token, count_attr: str) -> int:
183
+ """Return the minimum number of calls to keep in reserve for a token.
184
+
185
+ The reserve is the larger of ``BUDGET_MIN_RESERVE`` and
186
+ ``BUDGET_RESERVE_FRACTION`` of the token's actual remaining count.
187
+ We use the current remaining value as an approximation when the
188
+ original limit is unknown.
189
+ """
190
+ remaining = getattr(token, count_attr)
191
+ # Use 5000 (GitHub default) as the basis, but fall back to the
192
+ # remaining count if it's higher (e.g. enterprise tokens).
193
+ limit_estimate = max(5000, remaining)
194
+ return max(self.BUDGET_MIN_RESERVE, int(limit_estimate * self.BUDGET_RESERVE_FRACTION))
195
+
196
+ def _apply_budget_throttle(self, token: Token, count_attr: str, reset_attr: str) -> None:
197
+ """Optionally sleep a little to spread remaining calls over the reset window.
198
+
199
+ When the remaining count for *all* tokens is below the budget
200
+ reserve, we inject a short delay proportional to how much time
201
+ remains until the rate-limit window resets. This avoids hitting
202
+ the wall and having to do a long blocking sleep.
203
+ """
204
+ reserve = self._get_budget_reserve(token, count_attr)
205
+ remaining = getattr(token, count_attr)
206
+ if remaining > reserve:
207
+ return # plenty of headroom — no throttling needed
208
+
209
+ # Only throttle when *every* token is running low so that we don't
210
+ # slow down needlessly while other tokens still have capacity.
211
+ if not all(getattr(t, count_attr) <= self._get_budget_reserve(t, count_attr) for t in self._tokens.values()):
212
+ return
213
+
214
+ # Calculate a proportional delay: spread the remaining calls evenly
215
+ # across the time left until the earliest reset.
216
+ seconds_to_reset = max((getattr(token, reset_attr) - ab_datetime_now()).total_seconds(), 0)
217
+ total_remaining = sum(max(getattr(t, count_attr), 0) for t in self._tokens.values())
218
+ if total_remaining <= 0 or seconds_to_reset <= 0:
219
+ return
220
+
221
+ delay = seconds_to_reset / total_remaining
222
+ # Cap the delay to avoid extremely long pauses on single requests
223
+ delay = min(delay, 10.0)
224
+ if delay >= 0.1:
225
+ if not self._budget_logged:
226
+ self._logger.info(
227
+ "API budget: throttling requests (%.1fs delay). %d calls remaining across %d token(s), " "%.0fs until reset.",
228
+ delay,
229
+ total_remaining,
230
+ len(self._tokens),
231
+ seconds_to_reset,
232
+ )
233
+ self._budget_logged = True
234
+ time.sleep(delay)
235
+
236
+ HEARTBEAT_INTERVAL = 60.0 # Log every 60s during exhaustion sleep
237
+
238
+ def _sleep_with_heartbeat(self, total_seconds: float, count_attr: str) -> None:
239
+ """Sleep for *total_seconds* but log progress periodically so the
240
+ platform heartbeat stays alive and operators can see the connector
241
+ is not stuck."""
242
+ remaining = total_seconds
243
+ while remaining > 0:
244
+ chunk = min(remaining, self.HEARTBEAT_INTERVAL)
245
+ time.sleep(chunk)
246
+ remaining -= chunk
247
+ if remaining > 0:
248
+ self._logger.info(
249
+ "Rate limit exhausted (%s). Waiting for reset — %.0fs remaining.",
250
+ count_attr,
251
+ remaining,
252
+ )
172
253
 
173
254
  def process_token(self, current_token, count_attr, reset_attr):
174
255
  if getattr(current_token, count_attr) > 0:
256
+ self._apply_budget_throttle(current_token, count_attr, reset_attr)
175
257
  setattr(current_token, count_attr, getattr(current_token, count_attr) - 1)
176
258
  return True
177
259
  elif all(getattr(x, count_attr) == 0 for x in self._tokens.values()):
178
260
  min_time_to_wait = min((getattr(x, reset_attr) - ab_datetime_now()).total_seconds() for x in self._tokens.values())
179
261
  if min_time_to_wait < self.max_time:
180
- time.sleep(min_time_to_wait if min_time_to_wait > 0 else 0)
262
+ self._logger.info(
263
+ "All tokens exhausted (%s). Sleeping %.0fs until rate limit resets.",
264
+ count_attr,
265
+ max(min_time_to_wait, 0),
266
+ )
267
+ self._sleep_with_heartbeat(max(min_time_to_wait, 0), count_attr)
181
268
  self.check_all_tokens()
182
269
  else:
183
270
  raise GitHubAPILimitException(f"Rate limits for all tokens ({count_attr}) were reached")