airbyte-source-github 1.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_source_github-1.5.7.dist-info/METADATA +144 -0
- airbyte_source_github-1.5.7.dist-info/RECORD +88 -0
- airbyte_source_github-1.5.7.dist-info/WHEEL +5 -0
- airbyte_source_github-1.5.7.dist-info/entry_points.txt +2 -0
- airbyte_source_github-1.5.7.dist-info/top_level.txt +3 -0
- integration_tests/__init__.py +0 -0
- integration_tests/abnormal_state.json +237 -0
- integration_tests/acceptance.py +16 -0
- integration_tests/configured_catalog.json +435 -0
- integration_tests/configured_catalog_full_refresh_test.json +415 -0
- integration_tests/invalid_config.json +5 -0
- integration_tests/sample_config.json +5 -0
- integration_tests/sample_state.json +137 -0
- source_github/__init__.py +27 -0
- source_github/config_migrations.py +106 -0
- source_github/constants.py +9 -0
- source_github/github_schema.py +41034 -0
- source_github/graphql.py +327 -0
- source_github/run.py +17 -0
- source_github/schemas/assignees.json +63 -0
- source_github/schemas/branches.json +48 -0
- source_github/schemas/collaborators.json +80 -0
- source_github/schemas/comments.json +104 -0
- source_github/schemas/commit_comment_reactions.json +4 -0
- source_github/schemas/commit_comments.json +53 -0
- source_github/schemas/commits.json +126 -0
- source_github/schemas/contributor_activity.json +109 -0
- source_github/schemas/deployments.json +77 -0
- source_github/schemas/events.json +63 -0
- source_github/schemas/issue_comment_reactions.json +4 -0
- source_github/schemas/issue_events.json +335 -0
- source_github/schemas/issue_labels.json +30 -0
- source_github/schemas/issue_milestones.json +61 -0
- source_github/schemas/issue_reactions.json +28 -0
- source_github/schemas/issue_timeline_events.json +1056 -0
- source_github/schemas/issues.json +281 -0
- source_github/schemas/organizations.json +197 -0
- source_github/schemas/project_cards.json +50 -0
- source_github/schemas/project_columns.json +38 -0
- source_github/schemas/projects.json +50 -0
- source_github/schemas/projects_v2.json +80 -0
- source_github/schemas/pull_request_comment_reactions.json +28 -0
- source_github/schemas/pull_request_commits.json +122 -0
- source_github/schemas/pull_request_stats.json +84 -0
- source_github/schemas/pull_requests.json +363 -0
- source_github/schemas/releases.json +126 -0
- source_github/schemas/repositories.json +313 -0
- source_github/schemas/review_comments.json +118 -0
- source_github/schemas/reviews.json +69 -0
- source_github/schemas/shared/events/comment.json +188 -0
- source_github/schemas/shared/events/commented.json +118 -0
- source_github/schemas/shared/events/committed.json +56 -0
- source_github/schemas/shared/events/cross_referenced.json +784 -0
- source_github/schemas/shared/events/reviewed.json +139 -0
- source_github/schemas/shared/reaction.json +27 -0
- source_github/schemas/shared/reactions.json +35 -0
- source_github/schemas/shared/user.json +59 -0
- source_github/schemas/shared/user_graphql.json +26 -0
- source_github/schemas/stargazers.json +19 -0
- source_github/schemas/tags.json +32 -0
- source_github/schemas/team_members.json +66 -0
- source_github/schemas/team_memberships.json +24 -0
- source_github/schemas/teams.json +50 -0
- source_github/schemas/users.json +63 -0
- source_github/schemas/workflow_jobs.json +109 -0
- source_github/schemas/workflow_runs.json +449 -0
- source_github/schemas/workflows.json +41 -0
- source_github/source.py +339 -0
- source_github/spec.json +179 -0
- source_github/streams.py +1678 -0
- source_github/utils.py +152 -0
- unit_tests/__init__.py +3 -0
- unit_tests/conftest.py +29 -0
- unit_tests/projects_v2_pull_requests_query.json +3 -0
- unit_tests/pull_request_stats_query.json +3 -0
- unit_tests/responses/contributor_activity_response.json +33 -0
- unit_tests/responses/graphql_reviews_responses.json +405 -0
- unit_tests/responses/issue_timeline_events.json +166 -0
- unit_tests/responses/issue_timeline_events_response.json +170 -0
- unit_tests/responses/projects_v2_response.json +45 -0
- unit_tests/responses/pull_request_comment_reactions.json +744 -0
- unit_tests/responses/pull_request_stats_response.json +317 -0
- unit_tests/test_migrations/test_config.json +8 -0
- unit_tests/test_migrations/test_new_config.json +8 -0
- unit_tests/test_multiple_token_authenticator.py +160 -0
- unit_tests/test_source.py +326 -0
- unit_tests/test_stream.py +1471 -0
- unit_tests/utils.py +78 -0
source_github/streams.py
ADDED
@@ -0,0 +1,1678 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import time
|
6
|
+
from abc import ABC, abstractmethod
|
7
|
+
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional
|
8
|
+
from urllib import parse
|
9
|
+
|
10
|
+
import pendulum
|
11
|
+
import requests
|
12
|
+
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode
|
13
|
+
from airbyte_cdk.models import Type as MessageType
|
14
|
+
from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
|
15
|
+
from airbyte_cdk.sources.streams.http import HttpStream
|
16
|
+
from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException
|
17
|
+
from requests.exceptions import HTTPError
|
18
|
+
|
19
|
+
from . import constants
|
20
|
+
from .graphql import (
|
21
|
+
CursorStorage,
|
22
|
+
QueryReactions,
|
23
|
+
get_query_issue_reactions,
|
24
|
+
get_query_projectsV2,
|
25
|
+
get_query_pull_requests,
|
26
|
+
get_query_reviews,
|
27
|
+
)
|
28
|
+
from .utils import GitHubAPILimitException, getter
|
29
|
+
|
30
|
+
|
31
|
+
class GithubStreamABC(HttpStream, ABC):
|
32
|
+
|
33
|
+
primary_key = "id"
|
34
|
+
|
35
|
+
# Detect streams with high API load
|
36
|
+
large_stream = False
|
37
|
+
|
38
|
+
stream_base_params = {}
|
39
|
+
|
40
|
+
def __init__(self, api_url: str = "https://api.github.com", access_token_type: str = "", **kwargs):
|
41
|
+
if kwargs.get("authenticator"):
|
42
|
+
kwargs["authenticator"].max_time = self.max_time
|
43
|
+
super().__init__(**kwargs)
|
44
|
+
|
45
|
+
self.access_token_type = access_token_type
|
46
|
+
self.api_url = api_url
|
47
|
+
|
48
|
+
@property
|
49
|
+
def url_base(self) -> str:
|
50
|
+
return self.api_url
|
51
|
+
|
52
|
+
@property
|
53
|
+
def availability_strategy(self) -> Optional["AvailabilityStrategy"]:
|
54
|
+
return None
|
55
|
+
|
56
|
+
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
57
|
+
links = response.links
|
58
|
+
if "next" in links:
|
59
|
+
next_link = links["next"]["url"]
|
60
|
+
parsed_link = parse.urlparse(next_link)
|
61
|
+
page = dict(parse.parse_qsl(parsed_link.query)).get("page")
|
62
|
+
return {"page": page}
|
63
|
+
|
64
|
+
def request_params(
|
65
|
+
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
66
|
+
) -> MutableMapping[str, Any]:
|
67
|
+
|
68
|
+
params = {"per_page": self.page_size}
|
69
|
+
|
70
|
+
if next_page_token:
|
71
|
+
params.update(next_page_token)
|
72
|
+
|
73
|
+
params.update(self.stream_base_params)
|
74
|
+
|
75
|
+
return params
|
76
|
+
|
77
|
+
def request_headers(self, **kwargs) -> Mapping[str, Any]:
|
78
|
+
# Without sending `User-Agent` header we will be getting `403 Client Error: Forbidden for url` error.
|
79
|
+
return {"User-Agent": "PostmanRuntime/7.28.0"}
|
80
|
+
|
81
|
+
def parse_response(
|
82
|
+
self,
|
83
|
+
response: requests.Response,
|
84
|
+
stream_state: Mapping[str, Any],
|
85
|
+
stream_slice: Mapping[str, Any] = None,
|
86
|
+
next_page_token: Mapping[str, Any] = None,
|
87
|
+
) -> Iterable[Mapping]:
|
88
|
+
for record in response.json(): # GitHub puts records in an array.
|
89
|
+
yield self.transform(record=record, stream_slice=stream_slice)
|
90
|
+
|
91
|
+
def should_retry(self, response: requests.Response) -> bool:
|
92
|
+
if super().should_retry(response):
|
93
|
+
return True
|
94
|
+
|
95
|
+
retry_flag = (
|
96
|
+
# The GitHub GraphQL API has limitations
|
97
|
+
# https://docs.github.com/en/graphql/overview/resource-limitations
|
98
|
+
(response.headers.get("X-RateLimit-Resource") == "graphql" and self.check_graphql_rate_limited(response.json()))
|
99
|
+
# Rate limit HTTP headers
|
100
|
+
# https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limit-http-headers
|
101
|
+
or (response.status_code != 200 and response.headers.get("X-RateLimit-Remaining") == "0")
|
102
|
+
# Secondary rate limits
|
103
|
+
# https://docs.github.com/en/rest/overview/resources-in-the-rest-api#secondary-rate-limits
|
104
|
+
or "Retry-After" in response.headers
|
105
|
+
)
|
106
|
+
if retry_flag:
|
107
|
+
headers = [
|
108
|
+
"X-RateLimit-Resource",
|
109
|
+
"X-RateLimit-Remaining",
|
110
|
+
"X-RateLimit-Reset",
|
111
|
+
"X-RateLimit-Limit",
|
112
|
+
"X-RateLimit-Used",
|
113
|
+
"Retry-After",
|
114
|
+
]
|
115
|
+
headers = ", ".join([f"{h}: {response.headers[h]}" for h in headers if h in response.headers])
|
116
|
+
if headers:
|
117
|
+
headers = f"HTTP headers: {headers},"
|
118
|
+
|
119
|
+
self.logger.info(
|
120
|
+
f"Rate limit handling for stream `{self.name}` for the response with {response.status_code} status code, {headers} with message: {response.text}"
|
121
|
+
)
|
122
|
+
|
123
|
+
return retry_flag
|
124
|
+
|
125
|
+
def backoff_time(self, response: requests.Response) -> Optional[float]:
|
126
|
+
# This method is called if we run into the rate limit. GitHub limits requests to 5000 per hour and provides
|
127
|
+
# `X-RateLimit-Reset` header which contains time when this hour will be finished and limits will be reset so
|
128
|
+
# we again could have 5000 per another hour.
|
129
|
+
|
130
|
+
min_backoff_time = 60.0
|
131
|
+
retry_after = response.headers.get("Retry-After")
|
132
|
+
if retry_after is not None:
|
133
|
+
backoff_time_in_seconds = max(float(retry_after), min_backoff_time)
|
134
|
+
return self.get_waiting_time(backoff_time_in_seconds)
|
135
|
+
|
136
|
+
reset_time = response.headers.get("X-RateLimit-Reset")
|
137
|
+
if reset_time:
|
138
|
+
backoff_time_in_seconds = max(float(reset_time) - time.time(), min_backoff_time)
|
139
|
+
return self.get_waiting_time(backoff_time_in_seconds)
|
140
|
+
|
141
|
+
def get_waiting_time(self, backoff_time_in_seconds):
|
142
|
+
if backoff_time_in_seconds < self.max_time:
|
143
|
+
return backoff_time_in_seconds
|
144
|
+
else:
|
145
|
+
self._session.auth.update_token() # New token will be used in next request
|
146
|
+
return 1
|
147
|
+
|
148
|
+
@staticmethod
|
149
|
+
def check_graphql_rate_limited(response_json: dict) -> bool:
|
150
|
+
errors = response_json.get("errors")
|
151
|
+
if errors:
|
152
|
+
for error in errors:
|
153
|
+
if error.get("type") == "RATE_LIMITED":
|
154
|
+
return True
|
155
|
+
return False
|
156
|
+
|
157
|
+
def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping[str, Any]]:
|
158
|
+
# get out the stream_slice parts for later use.
|
159
|
+
organisation = stream_slice.get("organization", "")
|
160
|
+
repository = stream_slice.get("repository", "")
|
161
|
+
# Reading records while handling the errors
|
162
|
+
try:
|
163
|
+
yield from super().read_records(stream_slice=stream_slice, **kwargs)
|
164
|
+
except HTTPError as e:
|
165
|
+
# This whole try/except situation in `read_records()` isn't good but right now in `self._send_request()`
|
166
|
+
# function we have `response.raise_for_status()` so we don't have much choice on how to handle errors.
|
167
|
+
# Bocked on https://github.com/airbytehq/airbyte/issues/3514.
|
168
|
+
if e.response.status_code == requests.codes.NOT_FOUND:
|
169
|
+
# A lot of streams are not available for repositories owned by a user instead of an organization.
|
170
|
+
if isinstance(self, Organizations):
|
171
|
+
error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for organization `{organisation}`."
|
172
|
+
elif isinstance(self, TeamMemberships):
|
173
|
+
error_msg = f"Syncing `{self.__class__.__name__}` stream for organization `{organisation}`, team `{stream_slice.get('team_slug')}` and user `{stream_slice.get('username')}` isn't available: User has no team membership. Skipping..."
|
174
|
+
else:
|
175
|
+
error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{repository}`."
|
176
|
+
elif e.response.status_code == requests.codes.FORBIDDEN:
|
177
|
+
error_msg = str(e.response.json().get("message"))
|
178
|
+
# When using the `check_connection` method, we should raise an error if we do not have access to the repository.
|
179
|
+
if isinstance(self, Repositories):
|
180
|
+
raise e
|
181
|
+
# When `403` for the stream, that has no access to the organization's teams, based on OAuth Apps Restrictions:
|
182
|
+
# https://docs.github.com/en/organizations/restricting-access-to-your-organizations-data/enabling-oauth-app-access-restrictions-for-your-organization
|
183
|
+
# For all `Organisation` based streams
|
184
|
+
elif isinstance(self, Organizations) or isinstance(self, Teams) or isinstance(self, Users):
|
185
|
+
error_msg = (
|
186
|
+
f"Syncing `{self.name}` stream isn't available for organization `{organisation}`. Full error message: {error_msg}"
|
187
|
+
)
|
188
|
+
# For all other `Repository` base streams
|
189
|
+
else:
|
190
|
+
error_msg = (
|
191
|
+
f"Syncing `{self.name}` stream isn't available for repository `{repository}`. Full error message: {error_msg}"
|
192
|
+
)
|
193
|
+
elif e.response.status_code == requests.codes.UNAUTHORIZED:
|
194
|
+
if self.access_token_type == constants.PERSONAL_ACCESS_TOKEN_TITLE:
|
195
|
+
error_msg = str(e.response.json().get("message"))
|
196
|
+
self.logger.error(f"{self.access_token_type} renewal is required: {error_msg}")
|
197
|
+
raise e
|
198
|
+
elif e.response.status_code == requests.codes.GONE and isinstance(self, Projects):
|
199
|
+
# Some repos don't have projects enabled and we we get "410 Client Error: Gone for
|
200
|
+
# url: https://api.github.com/repos/xyz/projects?per_page=100" error.
|
201
|
+
error_msg = f"Syncing `Projects` stream isn't available for repository `{stream_slice['repository']}`."
|
202
|
+
elif e.response.status_code == requests.codes.CONFLICT:
|
203
|
+
error_msg = (
|
204
|
+
f"Syncing `{self.name}` stream isn't available for repository "
|
205
|
+
f"`{stream_slice['repository']}`, it seems like this repository is empty."
|
206
|
+
)
|
207
|
+
elif e.response.status_code == requests.codes.SERVER_ERROR and isinstance(self, WorkflowRuns):
|
208
|
+
error_msg = f"Syncing `{self.name}` stream isn't available for repository `{stream_slice['repository']}`."
|
209
|
+
elif e.response.status_code == requests.codes.BAD_GATEWAY:
|
210
|
+
error_msg = f"Stream {self.name} temporary failed. Try to re-run sync later"
|
211
|
+
else:
|
212
|
+
# most probably here we're facing a 500 server error and a risk to get a non-json response, so lets output response.text
|
213
|
+
self.logger.error(f"Undefined error while reading records: {e.response.text}")
|
214
|
+
raise e
|
215
|
+
|
216
|
+
self.logger.warning(error_msg)
|
217
|
+
except GitHubAPILimitException:
|
218
|
+
self.logger.warning(
|
219
|
+
f"Stream: `{self.name}`, slice: `{stream_slice}`. Limits for all provided tokens are reached, please try again later"
|
220
|
+
)
|
221
|
+
|
222
|
+
|
223
|
+
class GithubStream(GithubStreamABC):
|
224
|
+
def __init__(self, repositories: List[str], page_size_for_large_streams: int, **kwargs):
|
225
|
+
super().__init__(**kwargs)
|
226
|
+
self.repositories = repositories
|
227
|
+
# GitHub pagination could be from 1 to 100.
|
228
|
+
# This parameter is deprecated and in future will be used sane default, page_size: 10
|
229
|
+
self.page_size = page_size_for_large_streams if self.large_stream else constants.DEFAULT_PAGE_SIZE
|
230
|
+
|
231
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
232
|
+
return f"repos/{stream_slice['repository']}/{self.name}"
|
233
|
+
|
234
|
+
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
235
|
+
for repository in self.repositories:
|
236
|
+
yield {"repository": repository}
|
237
|
+
|
238
|
+
def get_error_display_message(self, exception: BaseException) -> Optional[str]:
|
239
|
+
if (
|
240
|
+
isinstance(exception, DefaultBackoffException)
|
241
|
+
and exception.response.status_code == requests.codes.BAD_GATEWAY
|
242
|
+
and self.large_stream
|
243
|
+
and self.page_size > 1
|
244
|
+
):
|
245
|
+
return f'Please try to decrease the "Page size for large streams" below {self.page_size}. The stream "{self.name}" is a large stream, such streams can fail with 502 for high "page_size" values.'
|
246
|
+
return super().get_error_display_message(exception)
|
247
|
+
|
248
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
249
|
+
record["repository"] = stream_slice["repository"]
|
250
|
+
return record
|
251
|
+
|
252
|
+
|
253
|
+
class SemiIncrementalMixin:
|
254
|
+
"""
|
255
|
+
Semi incremental streams are also incremental but with one difference, they:
|
256
|
+
- read all records;
|
257
|
+
- output only new records.
|
258
|
+
This means that semi incremental streams read all records (like full_refresh streams) but do filtering directly
|
259
|
+
in the code and output only latest records (like incremental streams).
|
260
|
+
"""
|
261
|
+
|
262
|
+
cursor_field = "updated_at"
|
263
|
+
|
264
|
+
# This flag is used to indicate that current stream supports `sort` and `direction` request parameters and that
|
265
|
+
# we should break processing records if possible. If `sort` is set to `updated` and `direction` is set to `desc`
|
266
|
+
# this means that latest records will be at the beginning of the response and after we processed those latest
|
267
|
+
# records we can just stop and not process other record. This will increase speed of each incremental stream
|
268
|
+
# which supports those 2 request parameters. Currently only `IssueMilestones` and `PullRequests` streams are
|
269
|
+
# supporting this.
|
270
|
+
is_sorted = False
|
271
|
+
|
272
|
+
def __init__(self, start_date: str = "", **kwargs):
|
273
|
+
super().__init__(**kwargs)
|
274
|
+
self._start_date = start_date
|
275
|
+
self._starting_point_cache = {}
|
276
|
+
|
277
|
+
@property
|
278
|
+
def slice_keys(self):
|
279
|
+
if hasattr(self, "repositories"):
|
280
|
+
return ["repository"]
|
281
|
+
return ["organization"]
|
282
|
+
|
283
|
+
record_slice_key = slice_keys
|
284
|
+
|
285
|
+
def convert_cursor_value(self, value):
|
286
|
+
return value
|
287
|
+
|
288
|
+
@property
|
289
|
+
def state_checkpoint_interval(self) -> Optional[int]:
|
290
|
+
if self.is_sorted == "asc":
|
291
|
+
return self.page_size
|
292
|
+
|
293
|
+
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]):
|
294
|
+
"""
|
295
|
+
Return the latest state by comparing the cursor value in the latest record with the stream's most recent state
|
296
|
+
object and returning an updated state object.
|
297
|
+
"""
|
298
|
+
slice_value = getter(latest_record, self.record_slice_key)
|
299
|
+
updated_state = self.convert_cursor_value(latest_record[self.cursor_field])
|
300
|
+
stream_state_value = current_stream_state.get(slice_value, {}).get(self.cursor_field)
|
301
|
+
if stream_state_value:
|
302
|
+
updated_state = max(updated_state, stream_state_value)
|
303
|
+
current_stream_state.setdefault(slice_value, {})[self.cursor_field] = updated_state
|
304
|
+
return current_stream_state
|
305
|
+
|
306
|
+
def _get_starting_point(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any]) -> str:
|
307
|
+
if stream_state:
|
308
|
+
state_path = [stream_slice[k] for k in self.slice_keys] + [self.cursor_field]
|
309
|
+
stream_state_value = getter(stream_state, state_path, strict=False)
|
310
|
+
if stream_state_value:
|
311
|
+
if self._start_date:
|
312
|
+
return max(self._start_date, stream_state_value)
|
313
|
+
return stream_state_value
|
314
|
+
return self._start_date
|
315
|
+
|
316
|
+
def get_starting_point(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any]) -> str:
|
317
|
+
cache_key = tuple([stream_slice[k] for k in self.slice_keys])
|
318
|
+
if cache_key not in self._starting_point_cache:
|
319
|
+
self._starting_point_cache[cache_key] = self._get_starting_point(stream_state, stream_slice)
|
320
|
+
return self._starting_point_cache[cache_key]
|
321
|
+
|
322
|
+
def read_records(
|
323
|
+
self,
|
324
|
+
sync_mode: SyncMode,
|
325
|
+
cursor_field: List[str] = None,
|
326
|
+
stream_slice: Mapping[str, Any] = None,
|
327
|
+
stream_state: Mapping[str, Any] = None,
|
328
|
+
) -> Iterable[Mapping[str, Any]]:
|
329
|
+
start_point = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
|
330
|
+
for record in super().read_records(
|
331
|
+
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
332
|
+
):
|
333
|
+
cursor_value = self.convert_cursor_value(record[self.cursor_field])
|
334
|
+
if not start_point or cursor_value > start_point:
|
335
|
+
yield record
|
336
|
+
elif self.is_sorted == "desc" and cursor_value < start_point:
|
337
|
+
break
|
338
|
+
|
339
|
+
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
340
|
+
self._starting_point_cache.clear()
|
341
|
+
yield from super().stream_slices(**kwargs)
|
342
|
+
|
343
|
+
|
344
|
+
class IncrementalMixin(SemiIncrementalMixin):
|
345
|
+
def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
|
346
|
+
params = super().request_params(stream_state=stream_state, **kwargs)
|
347
|
+
since_params = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
|
348
|
+
if since_params:
|
349
|
+
params["since"] = since_params
|
350
|
+
return params
|
351
|
+
|
352
|
+
|
353
|
+
# Below are full refresh streams
|
354
|
+
|
355
|
+
|
356
|
+
class RepositoryStats(GithubStream):
|
357
|
+
"""
|
358
|
+
This stream is technical and not intended for the user, we use it for checking connection with the repository.
|
359
|
+
API docs: https://docs.github.com/en/rest/reference/repos#get-a-repository
|
360
|
+
"""
|
361
|
+
|
362
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
363
|
+
return f"repos/{stream_slice['repository']}"
|
364
|
+
|
365
|
+
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
|
366
|
+
yield response.json()
|
367
|
+
|
368
|
+
|
369
|
+
class Assignees(GithubStream):
|
370
|
+
"""
|
371
|
+
API docs: https://docs.github.com/en/rest/issues/assignees?apiVersion=2022-11-28#list-assignees
|
372
|
+
"""
|
373
|
+
|
374
|
+
|
375
|
+
class Branches(GithubStream):
|
376
|
+
"""
|
377
|
+
API docs: https://docs.github.com/en/rest/branches/branches?apiVersion=2022-11-28#list-branches
|
378
|
+
"""
|
379
|
+
|
380
|
+
primary_key = ["repository", "name"]
|
381
|
+
|
382
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
383
|
+
return f"repos/{stream_slice['repository']}/branches"
|
384
|
+
|
385
|
+
|
386
|
+
class Collaborators(GithubStream):
|
387
|
+
"""
|
388
|
+
API docs: https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#list-repository-collaborators
|
389
|
+
"""
|
390
|
+
|
391
|
+
|
392
|
+
class IssueLabels(GithubStream):
|
393
|
+
"""
|
394
|
+
API docs: https://docs.github.com/en/rest/issues/labels?apiVersion=2022-11-28#list-labels-for-a-repository
|
395
|
+
"""
|
396
|
+
|
397
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
398
|
+
return f"repos/{stream_slice['repository']}/labels"
|
399
|
+
|
400
|
+
|
401
|
+
class Organizations(GithubStreamABC):
|
402
|
+
"""
|
403
|
+
API docs: https://docs.github.com/en/rest/orgs/orgs?apiVersion=2022-11-28#list-organizations
|
404
|
+
"""
|
405
|
+
|
406
|
+
# GitHub pagination could be from 1 to 100.
|
407
|
+
page_size = 100
|
408
|
+
|
409
|
+
def __init__(self, organizations: List[str], access_token_type: str = "", **kwargs):
|
410
|
+
super().__init__(**kwargs)
|
411
|
+
self.organizations = organizations
|
412
|
+
self.access_token_type = access_token_type
|
413
|
+
|
414
|
+
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
415
|
+
for organization in self.organizations:
|
416
|
+
yield {"organization": organization}
|
417
|
+
|
418
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
419
|
+
return f"orgs/{stream_slice['organization']}"
|
420
|
+
|
421
|
+
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
|
422
|
+
yield response.json()
|
423
|
+
|
424
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
425
|
+
record["organization"] = stream_slice["organization"]
|
426
|
+
return record
|
427
|
+
|
428
|
+
|
429
|
+
class Repositories(SemiIncrementalMixin, Organizations):
|
430
|
+
"""
|
431
|
+
API docs: https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-organization-repositories
|
432
|
+
"""
|
433
|
+
|
434
|
+
is_sorted = "desc"
|
435
|
+
stream_base_params = {
|
436
|
+
"sort": "updated",
|
437
|
+
"direction": "desc",
|
438
|
+
}
|
439
|
+
|
440
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
441
|
+
return f"orgs/{stream_slice['organization']}/repos"
|
442
|
+
|
443
|
+
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
|
444
|
+
for record in response.json(): # GitHub puts records in an array.
|
445
|
+
yield self.transform(record=record, stream_slice=stream_slice)
|
446
|
+
|
447
|
+
|
448
|
+
class Tags(GithubStream):
|
449
|
+
"""
|
450
|
+
API docs: https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-tags
|
451
|
+
"""
|
452
|
+
|
453
|
+
primary_key = ["repository", "name"]
|
454
|
+
|
455
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
456
|
+
return f"repos/{stream_slice['repository']}/tags"
|
457
|
+
|
458
|
+
|
459
|
+
class Teams(Organizations):
|
460
|
+
"""
|
461
|
+
API docs: https://docs.github.com/en/rest/teams/teams?apiVersion=2022-11-28#list-teams
|
462
|
+
"""
|
463
|
+
|
464
|
+
use_cache = True
|
465
|
+
|
466
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
467
|
+
return f"orgs/{stream_slice['organization']}/teams"
|
468
|
+
|
469
|
+
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
|
470
|
+
for record in response.json():
|
471
|
+
yield self.transform(record=record, stream_slice=stream_slice)
|
472
|
+
|
473
|
+
|
474
|
+
class Users(Organizations):
|
475
|
+
"""
|
476
|
+
API docs: https://docs.github.com/en/rest/orgs/members?apiVersion=2022-11-28#list-organization-members
|
477
|
+
"""
|
478
|
+
|
479
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
480
|
+
return f"orgs/{stream_slice['organization']}/members"
|
481
|
+
|
482
|
+
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
|
483
|
+
for record in response.json():
|
484
|
+
yield self.transform(record=record, stream_slice=stream_slice)
|
485
|
+
|
486
|
+
|
487
|
+
# Below are semi incremental streams
|
488
|
+
|
489
|
+
|
490
|
+
class Releases(SemiIncrementalMixin, GithubStream):
|
491
|
+
"""
|
492
|
+
API docs: https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases
|
493
|
+
"""
|
494
|
+
|
495
|
+
cursor_field = "created_at"
|
496
|
+
|
497
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
498
|
+
record = super().transform(record=record, stream_slice=stream_slice)
|
499
|
+
|
500
|
+
assets = record.get("assets", [])
|
501
|
+
for asset in assets:
|
502
|
+
uploader = asset.pop("uploader", None)
|
503
|
+
asset["uploader_id"] = uploader.get("id") if uploader else None
|
504
|
+
|
505
|
+
return record
|
506
|
+
|
507
|
+
|
508
|
+
class Events(SemiIncrementalMixin, GithubStream):
|
509
|
+
"""
|
510
|
+
API docs: https://docs.github.com/en/rest/activity/events?apiVersion=2022-11-28#list-repository-events
|
511
|
+
"""
|
512
|
+
|
513
|
+
cursor_field = "created_at"
|
514
|
+
|
515
|
+
|
516
|
+
class PullRequests(SemiIncrementalMixin, GithubStream):
|
517
|
+
"""
|
518
|
+
API docs: https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-pull-requests
|
519
|
+
"""
|
520
|
+
|
521
|
+
use_cache = True
|
522
|
+
large_stream = True
|
523
|
+
|
524
|
+
def __init__(self, **kwargs):
|
525
|
+
super().__init__(**kwargs)
|
526
|
+
self._first_read = True
|
527
|
+
|
528
|
+
def read_records(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping[str, Any]]:
|
529
|
+
"""
|
530
|
+
Decide if this a first read or not by the presence of the state object
|
531
|
+
"""
|
532
|
+
self._first_read = not bool(stream_state)
|
533
|
+
yield from super().read_records(stream_state=stream_state, **kwargs)
|
534
|
+
|
535
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
536
|
+
return f"repos/{stream_slice['repository']}/pulls"
|
537
|
+
|
538
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
539
|
+
record = super().transform(record=record, stream_slice=stream_slice)
|
540
|
+
|
541
|
+
for nested in ("head", "base"):
|
542
|
+
entry = record.get(nested, {})
|
543
|
+
entry["repo_id"] = (record.get("head", {}).pop("repo", {}) or {}).get("id")
|
544
|
+
|
545
|
+
return record
|
546
|
+
|
547
|
+
def request_params(self, **kwargs) -> MutableMapping[str, Any]:
|
548
|
+
base_params = super().request_params(**kwargs)
|
549
|
+
# The very first time we read this stream we want to read ascending so we can save state in case of
|
550
|
+
# a halfway failure. But if there is state, we read descending to allow incremental behavior.
|
551
|
+
params = {"state": "all", "sort": "updated", "direction": self.is_sorted}
|
552
|
+
|
553
|
+
return {**base_params, **params}
|
554
|
+
|
555
|
+
@property
|
556
|
+
def is_sorted(self) -> str:
|
557
|
+
"""
|
558
|
+
Depending if there any state we read stream in ascending or descending order.
|
559
|
+
"""
|
560
|
+
if self._first_read:
|
561
|
+
return "asc"
|
562
|
+
return "desc"
|
563
|
+
|
564
|
+
|
565
|
+
class CommitComments(SemiIncrementalMixin, GithubStream):
|
566
|
+
"""
|
567
|
+
API docs: https://docs.github.com/en/rest/commits/comments?apiVersion=2022-11-28#list-commit-comments-for-a-repository
|
568
|
+
"""
|
569
|
+
|
570
|
+
use_cache = True
|
571
|
+
|
572
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
573
|
+
return f"repos/{stream_slice['repository']}/comments"
|
574
|
+
|
575
|
+
|
576
|
+
class IssueMilestones(SemiIncrementalMixin, GithubStream):
|
577
|
+
"""
|
578
|
+
API docs: https://docs.github.com/en/rest/issues/milestones?apiVersion=2022-11-28#list-milestones
|
579
|
+
"""
|
580
|
+
|
581
|
+
is_sorted = "desc"
|
582
|
+
stream_base_params = {
|
583
|
+
"state": "all",
|
584
|
+
"sort": "updated",
|
585
|
+
"direction": "desc",
|
586
|
+
}
|
587
|
+
|
588
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
589
|
+
return f"repos/{stream_slice['repository']}/milestones"
|
590
|
+
|
591
|
+
|
592
|
+
class Stargazers(SemiIncrementalMixin, GithubStream):
|
593
|
+
"""
|
594
|
+
API docs: https://docs.github.com/en/rest/activity/starring?apiVersion=2022-11-28#list-stargazers
|
595
|
+
"""
|
596
|
+
|
597
|
+
primary_key = "user_id"
|
598
|
+
cursor_field = "starred_at"
|
599
|
+
|
600
|
+
def request_headers(self, **kwargs) -> Mapping[str, Any]:
|
601
|
+
base_headers = super().request_headers(**kwargs)
|
602
|
+
# We need to send below header if we want to get `starred_at` field. See docs (Alternative response with
|
603
|
+
# star creation timestamps) - https://docs.github.com/en/rest/reference/activity#list-stargazers.
|
604
|
+
headers = {"Accept": "application/vnd.github.v3.star+json"}
|
605
|
+
|
606
|
+
return {**base_headers, **headers}
|
607
|
+
|
608
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
609
|
+
"""
|
610
|
+
We need to provide the "user_id" for the primary_key attribute
|
611
|
+
and don't remove the whole "user" block from the record.
|
612
|
+
"""
|
613
|
+
record = super().transform(record=record, stream_slice=stream_slice)
|
614
|
+
record["user_id"] = record.get("user").get("id")
|
615
|
+
return record
|
616
|
+
|
617
|
+
|
618
|
+
class Projects(SemiIncrementalMixin, GithubStream):
|
619
|
+
"""
|
620
|
+
API docs: https://docs.github.com/en/rest/projects/projects?apiVersion=2022-11-28#list-repository-projects
|
621
|
+
"""
|
622
|
+
|
623
|
+
use_cache = True
|
624
|
+
stream_base_params = {
|
625
|
+
"state": "all",
|
626
|
+
}
|
627
|
+
|
628
|
+
def request_headers(self, **kwargs) -> Mapping[str, Any]:
|
629
|
+
base_headers = super().request_headers(**kwargs)
|
630
|
+
# Projects stream requires sending following `Accept` header. If we won't sent it
|
631
|
+
# we'll get `415 Client Error: Unsupported Media Type` error.
|
632
|
+
headers = {"Accept": "application/vnd.github.inertia-preview+json"}
|
633
|
+
|
634
|
+
return {**base_headers, **headers}
|
635
|
+
|
636
|
+
|
637
|
+
class IssueEvents(SemiIncrementalMixin, GithubStream):
|
638
|
+
"""
|
639
|
+
API docs: https://docs.github.com/en/rest/issues/events?apiVersion=2022-11-28#list-issue-events-for-a-repository
|
640
|
+
"""
|
641
|
+
|
642
|
+
cursor_field = "created_at"
|
643
|
+
|
644
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
645
|
+
return f"repos/{stream_slice['repository']}/issues/events"
|
646
|
+
|
647
|
+
|
648
|
+
# Below are incremental streams
|
649
|
+
|
650
|
+
|
651
|
+
class Comments(IncrementalMixin, GithubStream):
|
652
|
+
"""
|
653
|
+
API docs: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments-for-a-repository
|
654
|
+
"""
|
655
|
+
|
656
|
+
use_cache = True
|
657
|
+
large_stream = True
|
658
|
+
max_retries = 7
|
659
|
+
|
660
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
661
|
+
return f"repos/{stream_slice['repository']}/issues/comments"
|
662
|
+
|
663
|
+
|
664
|
+
class Commits(IncrementalMixin, GithubStream):
|
665
|
+
"""
|
666
|
+
API docs: https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#list-commits
|
667
|
+
|
668
|
+
Pull commits from each branch of each repository, tracking state for each branch
|
669
|
+
"""
|
670
|
+
|
671
|
+
primary_key = "sha"
|
672
|
+
cursor_field = "created_at"
|
673
|
+
slice_keys = ["repository", "branch"]
|
674
|
+
|
675
|
+
def __init__(self, branches_to_pull: Mapping[str, List[str]], default_branches: Mapping[str, str], **kwargs):
|
676
|
+
super().__init__(**kwargs)
|
677
|
+
self.branches_to_pull = branches_to_pull
|
678
|
+
self.default_branches = default_branches
|
679
|
+
|
680
|
+
def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
|
681
|
+
params = super(IncrementalMixin, self).request_params(stream_state=stream_state, stream_slice=stream_slice, **kwargs)
|
682
|
+
since = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
|
683
|
+
if since:
|
684
|
+
params["since"] = since
|
685
|
+
params["sha"] = stream_slice["branch"]
|
686
|
+
return params
|
687
|
+
|
688
|
+
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
689
|
+
for stream_slice in super().stream_slices(**kwargs):
|
690
|
+
repository = stream_slice["repository"]
|
691
|
+
for branch in self.branches_to_pull.get(repository, []):
|
692
|
+
yield {"branch": branch, "repository": repository}
|
693
|
+
|
694
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
695
|
+
record = super().transform(record=record, stream_slice=stream_slice)
|
696
|
+
|
697
|
+
# Record of the `commits` stream doesn't have an updated_at/created_at field at the top level (so we could
|
698
|
+
# just write `record["updated_at"]` or `record["created_at"]`). Instead each record has such value in
|
699
|
+
# `commit.author.date`. So the easiest way is to just enrich the record returned from API with top level
|
700
|
+
# field `created_at` and use it as cursor_field.
|
701
|
+
# Include the branch in the record
|
702
|
+
record["created_at"] = record["commit"]["author"]["date"]
|
703
|
+
record["branch"] = stream_slice["branch"]
|
704
|
+
|
705
|
+
return record
|
706
|
+
|
707
|
+
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]):
|
708
|
+
repository = latest_record["repository"]
|
709
|
+
branch = latest_record["branch"]
|
710
|
+
updated_state = latest_record[self.cursor_field]
|
711
|
+
stream_state_value = current_stream_state.get(repository, {}).get(branch, {}).get(self.cursor_field)
|
712
|
+
if stream_state_value:
|
713
|
+
updated_state = max(updated_state, stream_state_value)
|
714
|
+
current_stream_state.setdefault(repository, {}).setdefault(branch, {})[self.cursor_field] = updated_state
|
715
|
+
return current_stream_state
|
716
|
+
|
717
|
+
|
718
|
+
class Issues(IncrementalMixin, GithubStream):
|
719
|
+
"""
|
720
|
+
API docs: https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#list-repository-issues
|
721
|
+
"""
|
722
|
+
|
723
|
+
use_cache = True
|
724
|
+
large_stream = True
|
725
|
+
is_sorted = "asc"
|
726
|
+
|
727
|
+
stream_base_params = {
|
728
|
+
"state": "all",
|
729
|
+
"sort": "updated",
|
730
|
+
"direction": "asc",
|
731
|
+
}
|
732
|
+
|
733
|
+
|
734
|
+
class ReviewComments(IncrementalMixin, GithubStream):
|
735
|
+
"""
|
736
|
+
API docs: https://docs.github.com/en/rest/pulls/comments?apiVersion=2022-11-28#list-review-comments-in-a-repository
|
737
|
+
"""
|
738
|
+
|
739
|
+
use_cache = True
|
740
|
+
large_stream = True
|
741
|
+
|
742
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
743
|
+
return f"repos/{stream_slice['repository']}/pulls/comments"
|
744
|
+
|
745
|
+
|
746
|
+
class GitHubGraphQLStream(GithubStream, ABC):
|
747
|
+
|
748
|
+
http_method = "POST"
|
749
|
+
|
750
|
+
def path(
|
751
|
+
self, *, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
752
|
+
) -> str:
|
753
|
+
return "graphql"
|
754
|
+
|
755
|
+
def should_retry(self, response: requests.Response) -> bool:
|
756
|
+
if response.status_code in (requests.codes.BAD_GATEWAY, requests.codes.GATEWAY_TIMEOUT):
|
757
|
+
self.page_size = int(self.page_size / 2)
|
758
|
+
return True
|
759
|
+
self.page_size = constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM if self.large_stream else constants.DEFAULT_PAGE_SIZE
|
760
|
+
return super().should_retry(response) or response.json().get("errors")
|
761
|
+
|
762
|
+
def _get_repository_name(self, repository: Mapping[str, Any]) -> str:
|
763
|
+
return repository["owner"]["login"] + "/" + repository["name"]
|
764
|
+
|
765
|
+
def request_params(
|
766
|
+
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
767
|
+
) -> MutableMapping[str, Any]:
|
768
|
+
return {}
|
769
|
+
|
770
|
+
|
771
|
+
class PullRequestStats(SemiIncrementalMixin, GitHubGraphQLStream):
|
772
|
+
"""
|
773
|
+
API docs: https://docs.github.com/en/graphql/reference/objects#pullrequest
|
774
|
+
"""
|
775
|
+
|
776
|
+
large_stream = True
|
777
|
+
is_sorted = "asc"
|
778
|
+
|
779
|
+
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
|
780
|
+
repository = response.json()["data"]["repository"]
|
781
|
+
if repository:
|
782
|
+
nodes = repository["pullRequests"]["nodes"]
|
783
|
+
for record in nodes:
|
784
|
+
record["review_comments"] = sum([node["comments"]["totalCount"] for node in record["review_comments"]["nodes"]])
|
785
|
+
record["comments"] = record["comments"]["totalCount"]
|
786
|
+
record["commits"] = record["commits"]["totalCount"]
|
787
|
+
record["repository"] = self._get_repository_name(repository)
|
788
|
+
if record["merged_by"]:
|
789
|
+
record["merged_by"]["type"] = record["merged_by"].pop("__typename")
|
790
|
+
yield record
|
791
|
+
|
792
|
+
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
793
|
+
repository = response.json()["data"]["repository"]
|
794
|
+
if repository:
|
795
|
+
pageInfo = repository["pullRequests"]["pageInfo"]
|
796
|
+
if pageInfo["hasNextPage"]:
|
797
|
+
return {"after": pageInfo["endCursor"]}
|
798
|
+
|
799
|
+
def request_body_json(
|
800
|
+
self,
|
801
|
+
stream_state: Mapping[str, Any],
|
802
|
+
stream_slice: Mapping[str, Any] = None,
|
803
|
+
next_page_token: Mapping[str, Any] = None,
|
804
|
+
) -> Optional[Mapping]:
|
805
|
+
organization, name = stream_slice["repository"].split("/")
|
806
|
+
if next_page_token:
|
807
|
+
next_page_token = next_page_token["after"]
|
808
|
+
query = get_query_pull_requests(
|
809
|
+
owner=organization, name=name, first=self.page_size, after=next_page_token, direction=self.is_sorted.upper()
|
810
|
+
)
|
811
|
+
return {"query": query}
|
812
|
+
|
813
|
+
def request_headers(self, **kwargs) -> Mapping[str, Any]:
|
814
|
+
base_headers = super().request_headers(**kwargs)
|
815
|
+
# https://docs.github.com/en/graphql/overview/schema-previews#merge-info-preview
|
816
|
+
headers = {"Accept": "application/vnd.github.merge-info-preview+json"}
|
817
|
+
return {**base_headers, **headers}
|
818
|
+
|
819
|
+
|
820
|
+
class Reviews(SemiIncrementalMixin, GitHubGraphQLStream):
|
821
|
+
"""
|
822
|
+
API docs: https://docs.github.com/en/rest/pulls/reviews?apiVersion=2022-11-28#list-reviews-for-a-pull-request
|
823
|
+
"""
|
824
|
+
|
825
|
+
is_sorted = False
|
826
|
+
cursor_field = "updated_at"
|
827
|
+
|
828
|
+
def __init__(self, **kwargs):
|
829
|
+
super().__init__(**kwargs)
|
830
|
+
self.pull_requests_cursor = {}
|
831
|
+
self.reviews_cursors = {}
|
832
|
+
|
833
|
+
def _get_records(self, pull_request, repository_name):
|
834
|
+
"yield review records from pull_request"
|
835
|
+
for record in pull_request["reviews"]["nodes"]:
|
836
|
+
record["repository"] = repository_name
|
837
|
+
record["pull_request_url"] = pull_request["url"]
|
838
|
+
if record["commit"]:
|
839
|
+
record["commit_id"] = record.pop("commit")["oid"]
|
840
|
+
if record["user"]:
|
841
|
+
record["user"]["type"] = record["user"].pop("__typename")
|
842
|
+
# for backward compatibility with REST API response
|
843
|
+
record["_links"] = {
|
844
|
+
"html": {"href": record["html_url"]},
|
845
|
+
"pull_request": {"href": record["pull_request_url"]},
|
846
|
+
}
|
847
|
+
yield record
|
848
|
+
|
849
|
+
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
|
850
|
+
repository = response.json()["data"]["repository"]
|
851
|
+
if repository:
|
852
|
+
repository_name = self._get_repository_name(repository)
|
853
|
+
if "pullRequests" in repository:
|
854
|
+
for pull_request in repository["pullRequests"]["nodes"]:
|
855
|
+
yield from self._get_records(pull_request, repository_name)
|
856
|
+
elif "pullRequest" in repository:
|
857
|
+
yield from self._get_records(repository["pullRequest"], repository_name)
|
858
|
+
|
859
|
+
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
860
|
+
repository = response.json()["data"]["repository"]
|
861
|
+
if repository:
|
862
|
+
repository_name = self._get_repository_name(repository)
|
863
|
+
reviews_cursors = self.reviews_cursors.setdefault(repository_name, {})
|
864
|
+
if "pullRequests" in repository:
|
865
|
+
if repository["pullRequests"]["pageInfo"]["hasNextPage"]:
|
866
|
+
self.pull_requests_cursor[repository_name] = repository["pullRequests"]["pageInfo"]["endCursor"]
|
867
|
+
for pull_request in repository["pullRequests"]["nodes"]:
|
868
|
+
if pull_request["reviews"]["pageInfo"]["hasNextPage"]:
|
869
|
+
pull_request_number = pull_request["number"]
|
870
|
+
reviews_cursors[pull_request_number] = pull_request["reviews"]["pageInfo"]["endCursor"]
|
871
|
+
elif "pullRequest" in repository:
|
872
|
+
if repository["pullRequest"]["reviews"]["pageInfo"]["hasNextPage"]:
|
873
|
+
pull_request_number = repository["pullRequest"]["number"]
|
874
|
+
reviews_cursors[pull_request_number] = repository["pullRequest"]["reviews"]["pageInfo"]["endCursor"]
|
875
|
+
if reviews_cursors:
|
876
|
+
number, after = reviews_cursors.popitem()
|
877
|
+
return {"after": after, "number": number}
|
878
|
+
if repository_name in self.pull_requests_cursor:
|
879
|
+
return {"after": self.pull_requests_cursor.pop(repository_name)}
|
880
|
+
|
881
|
+
def request_body_json(
|
882
|
+
self,
|
883
|
+
stream_state: Mapping[str, Any],
|
884
|
+
stream_slice: Mapping[str, Any] = None,
|
885
|
+
next_page_token: Mapping[str, Any] = None,
|
886
|
+
) -> Optional[Mapping]:
|
887
|
+
organization, name = stream_slice["repository"].split("/")
|
888
|
+
if not next_page_token:
|
889
|
+
next_page_token = {"after": None}
|
890
|
+
query = get_query_reviews(owner=organization, name=name, first=self.page_size, **next_page_token)
|
891
|
+
return {"query": query}
|
892
|
+
|
893
|
+
|
894
|
+
class PullRequestCommits(GithubStream):
|
895
|
+
"""
|
896
|
+
API docs: https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-commits-on-a-pull-request
|
897
|
+
"""
|
898
|
+
|
899
|
+
primary_key = "sha"
|
900
|
+
|
901
|
+
def __init__(self, parent: HttpStream, **kwargs):
|
902
|
+
super().__init__(**kwargs)
|
903
|
+
self.parent = parent
|
904
|
+
|
905
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
906
|
+
return f"repos/{stream_slice['repository']}/pulls/{stream_slice['pull_number']}/commits"
|
907
|
+
|
908
|
+
def stream_slices(
|
909
|
+
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
910
|
+
) -> Iterable[Optional[Mapping[str, Any]]]:
|
911
|
+
parent_stream_slices = self.parent.stream_slices(
|
912
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_state=stream_state
|
913
|
+
)
|
914
|
+
for stream_slice in parent_stream_slices:
|
915
|
+
parent_records = self.parent.read_records(
|
916
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
917
|
+
)
|
918
|
+
for record in parent_records:
|
919
|
+
yield {"repository": record["repository"], "pull_number": record["number"]}
|
920
|
+
|
921
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
922
|
+
record = super().transform(record=record, stream_slice=stream_slice)
|
923
|
+
record["pull_number"] = stream_slice["pull_number"]
|
924
|
+
return record
|
925
|
+
|
926
|
+
|
927
|
+
class ProjectsV2(SemiIncrementalMixin, GitHubGraphQLStream):
|
928
|
+
"""
|
929
|
+
API docs: https://docs.github.com/en/graphql/reference/objects#projectv2
|
930
|
+
"""
|
931
|
+
|
932
|
+
is_sorted = "asc"
|
933
|
+
|
934
|
+
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
|
935
|
+
repository = response.json()["data"]["repository"]
|
936
|
+
if repository:
|
937
|
+
nodes = repository["projectsV2"]["nodes"]
|
938
|
+
for record in nodes:
|
939
|
+
record["owner_id"] = record.pop("owner").get("id")
|
940
|
+
record["repository"] = self._get_repository_name(repository)
|
941
|
+
yield record
|
942
|
+
|
943
|
+
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
944
|
+
repository = response.json()["data"]["repository"]
|
945
|
+
if repository:
|
946
|
+
page_info = repository["projectsV2"]["pageInfo"]
|
947
|
+
if page_info["hasNextPage"]:
|
948
|
+
return {"after": page_info["endCursor"]}
|
949
|
+
|
950
|
+
def request_body_json(
|
951
|
+
self,
|
952
|
+
stream_state: Mapping[str, Any],
|
953
|
+
stream_slice: Mapping[str, Any] = None,
|
954
|
+
next_page_token: Mapping[str, Any] = None,
|
955
|
+
) -> Optional[Mapping]:
|
956
|
+
organization, name = stream_slice["repository"].split("/")
|
957
|
+
if next_page_token:
|
958
|
+
next_page_token = next_page_token["after"]
|
959
|
+
query = get_query_projectsV2(
|
960
|
+
owner=organization, name=name, first=self.page_size, after=next_page_token, direction=self.is_sorted.upper()
|
961
|
+
)
|
962
|
+
return {"query": query}
|
963
|
+
|
964
|
+
|
965
|
+
# Reactions streams
|
966
|
+
|
967
|
+
|
968
|
+
class ReactionStream(GithubStream, ABC):
|
969
|
+
|
970
|
+
parent_key = "id"
|
971
|
+
copy_parent_key = "comment_id"
|
972
|
+
cursor_field = "created_at"
|
973
|
+
|
974
|
+
def __init__(self, start_date: str = "", **kwargs):
|
975
|
+
super().__init__(**kwargs)
|
976
|
+
kwargs["start_date"] = start_date
|
977
|
+
self._parent_stream = self.parent_entity(**kwargs)
|
978
|
+
self._start_date = start_date
|
979
|
+
|
980
|
+
@property
|
981
|
+
@abstractmethod
|
982
|
+
def parent_entity(self):
|
983
|
+
"""
|
984
|
+
Specify the class of the parent stream for which receive reactions
|
985
|
+
"""
|
986
|
+
|
987
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
988
|
+
parent_path = self._parent_stream.path(stream_slice=stream_slice, **kwargs)
|
989
|
+
return f"{parent_path}/{stream_slice[self.copy_parent_key]}/reactions"
|
990
|
+
|
991
|
+
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
992
|
+
for stream_slice in super().stream_slices(**kwargs):
|
993
|
+
for parent_record in self._parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
|
994
|
+
yield {self.copy_parent_key: parent_record[self.parent_key], "repository": stream_slice["repository"]}
|
995
|
+
|
996
|
+
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]):
|
997
|
+
repository = latest_record["repository"]
|
998
|
+
parent_id = str(latest_record[self.copy_parent_key])
|
999
|
+
updated_state = latest_record[self.cursor_field]
|
1000
|
+
stream_state_value = current_stream_state.get(repository, {}).get(parent_id, {}).get(self.cursor_field)
|
1001
|
+
if stream_state_value:
|
1002
|
+
updated_state = max(updated_state, stream_state_value)
|
1003
|
+
current_stream_state.setdefault(repository, {}).setdefault(parent_id, {})[self.cursor_field] = updated_state
|
1004
|
+
return current_stream_state
|
1005
|
+
|
1006
|
+
def get_starting_point(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any]) -> str:
|
1007
|
+
if stream_state:
|
1008
|
+
repository = stream_slice["repository"]
|
1009
|
+
parent_id = str(stream_slice[self.copy_parent_key])
|
1010
|
+
stream_state_value = stream_state.get(repository, {}).get(parent_id, {}).get(self.cursor_field)
|
1011
|
+
if stream_state_value:
|
1012
|
+
if self._start_date:
|
1013
|
+
return max(self._start_date, stream_state_value)
|
1014
|
+
return stream_state_value
|
1015
|
+
return self._start_date
|
1016
|
+
|
1017
|
+
def read_records(
|
1018
|
+
self,
|
1019
|
+
sync_mode: SyncMode,
|
1020
|
+
cursor_field: List[str] = None,
|
1021
|
+
stream_slice: Mapping[str, Any] = None,
|
1022
|
+
stream_state: Mapping[str, Any] = None,
|
1023
|
+
) -> Iterable[Mapping[str, Any]]:
|
1024
|
+
starting_point = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
|
1025
|
+
for record in super().read_records(
|
1026
|
+
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
1027
|
+
):
|
1028
|
+
if not starting_point or record[self.cursor_field] > starting_point:
|
1029
|
+
yield record
|
1030
|
+
|
1031
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
1032
|
+
record = super().transform(record, stream_slice)
|
1033
|
+
record[self.copy_parent_key] = stream_slice[self.copy_parent_key]
|
1034
|
+
return record
|
1035
|
+
|
1036
|
+
|
1037
|
+
class CommitCommentReactions(ReactionStream):
|
1038
|
+
"""
|
1039
|
+
API docs: https://docs.github.com/en/rest/reference/reactions?apiVersion=2022-11-28#list-reactions-for-a-commit-comment
|
1040
|
+
"""
|
1041
|
+
|
1042
|
+
parent_entity = CommitComments
|
1043
|
+
|
1044
|
+
|
1045
|
+
class IssueCommentReactions(ReactionStream):
|
1046
|
+
"""
|
1047
|
+
API docs: https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue-comment
|
1048
|
+
"""
|
1049
|
+
|
1050
|
+
parent_entity = Comments
|
1051
|
+
|
1052
|
+
|
1053
|
+
class IssueReactions(SemiIncrementalMixin, GitHubGraphQLStream):
|
1054
|
+
"""
|
1055
|
+
https://docs.github.com/en/graphql/reference/objects#issue
|
1056
|
+
https://docs.github.com/en/graphql/reference/objects#reaction
|
1057
|
+
"""
|
1058
|
+
|
1059
|
+
cursor_field = "created_at"
|
1060
|
+
|
1061
|
+
def __init__(self, **kwargs):
|
1062
|
+
super().__init__(**kwargs)
|
1063
|
+
self.issues_cursor = {}
|
1064
|
+
self.reactions_cursors = {}
|
1065
|
+
|
1066
|
+
def _get_reactions_from_issue(self, issue, repository_name):
|
1067
|
+
for reaction in issue["reactions"]["nodes"]:
|
1068
|
+
reaction["repository"] = repository_name
|
1069
|
+
reaction["issue_number"] = issue["number"]
|
1070
|
+
reaction["user"]["type"] = "User"
|
1071
|
+
yield reaction
|
1072
|
+
|
1073
|
+
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
|
1074
|
+
repository = response.json()["data"]["repository"]
|
1075
|
+
if repository:
|
1076
|
+
repository_name = self._get_repository_name(repository)
|
1077
|
+
if "issues" in repository:
|
1078
|
+
for issue in repository["issues"]["nodes"]:
|
1079
|
+
yield from self._get_reactions_from_issue(issue, repository_name)
|
1080
|
+
elif "issue" in repository:
|
1081
|
+
yield from self._get_reactions_from_issue(repository["issue"], repository_name)
|
1082
|
+
|
1083
|
+
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
1084
|
+
repository = response.json()["data"]["repository"]
|
1085
|
+
if repository:
|
1086
|
+
repository_name = self._get_repository_name(repository)
|
1087
|
+
reactions_cursors = self.reactions_cursors.setdefault(repository_name, {})
|
1088
|
+
if "issues" in repository:
|
1089
|
+
if repository["issues"]["pageInfo"]["hasNextPage"]:
|
1090
|
+
self.issues_cursor[repository_name] = repository["issues"]["pageInfo"]["endCursor"]
|
1091
|
+
for issue in repository["issues"]["nodes"]:
|
1092
|
+
if issue["reactions"]["pageInfo"]["hasNextPage"]:
|
1093
|
+
issue_number = issue["number"]
|
1094
|
+
reactions_cursors[issue_number] = issue["reactions"]["pageInfo"]["endCursor"]
|
1095
|
+
elif "issue" in repository:
|
1096
|
+
if repository["issue"]["reactions"]["pageInfo"]["hasNextPage"]:
|
1097
|
+
issue_number = repository["issue"]["number"]
|
1098
|
+
reactions_cursors[issue_number] = repository["issue"]["reactions"]["pageInfo"]["endCursor"]
|
1099
|
+
if reactions_cursors:
|
1100
|
+
number, after = reactions_cursors.popitem()
|
1101
|
+
return {"after": after, "number": number}
|
1102
|
+
if repository_name in self.issues_cursor:
|
1103
|
+
return {"after": self.issues_cursor.pop(repository_name)}
|
1104
|
+
|
1105
|
+
def request_body_json(
|
1106
|
+
self,
|
1107
|
+
stream_state: Mapping[str, Any],
|
1108
|
+
stream_slice: Mapping[str, Any] = None,
|
1109
|
+
next_page_token: Mapping[str, Any] = None,
|
1110
|
+
) -> Optional[Mapping]:
|
1111
|
+
organization, name = stream_slice["repository"].split("/")
|
1112
|
+
if not next_page_token:
|
1113
|
+
next_page_token = {"after": None}
|
1114
|
+
query = get_query_issue_reactions(owner=organization, name=name, first=self.page_size, **next_page_token)
|
1115
|
+
return {"query": query}
|
1116
|
+
|
1117
|
+
|
1118
|
+
class PullRequestCommentReactions(SemiIncrementalMixin, GitHubGraphQLStream):
|
1119
|
+
"""
|
1120
|
+
API docs:
|
1121
|
+
https://docs.github.com/en/graphql/reference/objects#pullrequestreviewcomment
|
1122
|
+
https://docs.github.com/en/graphql/reference/objects#reaction
|
1123
|
+
"""
|
1124
|
+
|
1125
|
+
cursor_field = "created_at"
|
1126
|
+
|
1127
|
+
def __init__(self, **kwargs):
|
1128
|
+
super().__init__(**kwargs)
|
1129
|
+
self.cursor_storage = CursorStorage(["PullRequest", "PullRequestReview", "PullRequestReviewComment", "Reaction"])
|
1130
|
+
self.query_reactions = QueryReactions()
|
1131
|
+
|
1132
|
+
def _get_reactions_from_comment(self, comment, repository):
|
1133
|
+
for reaction in comment["reactions"]["nodes"]:
|
1134
|
+
reaction["repository"] = self._get_repository_name(repository)
|
1135
|
+
reaction["comment_id"] = comment["id"]
|
1136
|
+
if reaction["user"]:
|
1137
|
+
reaction["user"]["type"] = "User"
|
1138
|
+
yield reaction
|
1139
|
+
|
1140
|
+
def _get_reactions_from_review(self, review, repository):
|
1141
|
+
for comment in review["comments"]["nodes"]:
|
1142
|
+
yield from self._get_reactions_from_comment(comment, repository)
|
1143
|
+
|
1144
|
+
def _get_reactions_from_pull_request(self, pull_request, repository):
|
1145
|
+
for review in pull_request["reviews"]["nodes"]:
|
1146
|
+
yield from self._get_reactions_from_review(review, repository)
|
1147
|
+
|
1148
|
+
def _get_reactions_from_repository(self, repository):
|
1149
|
+
for pull_request in repository["pullRequests"]["nodes"]:
|
1150
|
+
yield from self._get_reactions_from_pull_request(pull_request, repository)
|
1151
|
+
|
1152
|
+
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
|
1153
|
+
data = response.json()["data"]
|
1154
|
+
repository = data.get("repository")
|
1155
|
+
if repository:
|
1156
|
+
yield from self._get_reactions_from_repository(repository)
|
1157
|
+
|
1158
|
+
node = data.get("node")
|
1159
|
+
if node:
|
1160
|
+
if node["__typename"] == "PullRequest":
|
1161
|
+
yield from self._get_reactions_from_pull_request(node, node["repository"])
|
1162
|
+
elif node["__typename"] == "PullRequestReview":
|
1163
|
+
yield from self._get_reactions_from_review(node, node["repository"])
|
1164
|
+
elif node["__typename"] == "PullRequestReviewComment":
|
1165
|
+
yield from self._get_reactions_from_comment(node, node["repository"])
|
1166
|
+
|
1167
|
+
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
1168
|
+
data = response.json()["data"]
|
1169
|
+
repository = data.get("repository")
|
1170
|
+
if repository:
|
1171
|
+
self._add_cursor(repository, "pullRequests")
|
1172
|
+
for pull_request in repository["pullRequests"]["nodes"]:
|
1173
|
+
self._add_cursor(pull_request, "reviews")
|
1174
|
+
for review in pull_request["reviews"]["nodes"]:
|
1175
|
+
self._add_cursor(review, "comments")
|
1176
|
+
for comment in review["comments"]["nodes"]:
|
1177
|
+
self._add_cursor(comment, "reactions")
|
1178
|
+
|
1179
|
+
node = data.get("node")
|
1180
|
+
if node:
|
1181
|
+
if node["__typename"] == "PullRequest":
|
1182
|
+
self._add_cursor(node, "reviews")
|
1183
|
+
for review in node["reviews"]["nodes"]:
|
1184
|
+
self._add_cursor(review, "comments")
|
1185
|
+
for comment in review["comments"]["nodes"]:
|
1186
|
+
self._add_cursor(comment, "reactions")
|
1187
|
+
elif node["__typename"] == "PullRequestReview":
|
1188
|
+
self._add_cursor(node, "comments")
|
1189
|
+
for comment in node["comments"]["nodes"]:
|
1190
|
+
self._add_cursor(comment, "reactions")
|
1191
|
+
elif node["__typename"] == "PullRequestReviewComment":
|
1192
|
+
self._add_cursor(node, "reactions")
|
1193
|
+
|
1194
|
+
return self.cursor_storage.get_cursor()
|
1195
|
+
|
1196
|
+
def _add_cursor(self, node, link):
|
1197
|
+
link_to_object = {
|
1198
|
+
"reactions": "Reaction",
|
1199
|
+
"comments": "PullRequestReviewComment",
|
1200
|
+
"reviews": "PullRequestReview",
|
1201
|
+
"pullRequests": "PullRequest",
|
1202
|
+
}
|
1203
|
+
|
1204
|
+
pageInfo = node[link]["pageInfo"]
|
1205
|
+
if pageInfo["hasNextPage"]:
|
1206
|
+
self.cursor_storage.add_cursor(
|
1207
|
+
link_to_object[link], pageInfo["endCursor"], node[link]["totalCount"], parent_id=node.get("node_id")
|
1208
|
+
)
|
1209
|
+
|
1210
|
+
def request_body_json(
|
1211
|
+
self,
|
1212
|
+
stream_state: Mapping[str, Any],
|
1213
|
+
stream_slice: Mapping[str, Any] = None,
|
1214
|
+
next_page_token: Mapping[str, Any] = None,
|
1215
|
+
) -> Optional[Mapping]:
|
1216
|
+
organization, name = stream_slice["repository"].split("/")
|
1217
|
+
if next_page_token:
|
1218
|
+
after = next_page_token["cursor"]
|
1219
|
+
page_size = min(self.page_size, next_page_token["total_count"])
|
1220
|
+
if next_page_token["typename"] == "PullRequest":
|
1221
|
+
query = self.query_reactions.get_query_root_repository(owner=organization, name=name, first=page_size, after=after)
|
1222
|
+
elif next_page_token["typename"] == "PullRequestReview":
|
1223
|
+
query = self.query_reactions.get_query_root_pull_request(node_id=next_page_token["parent_id"], first=page_size, after=after)
|
1224
|
+
elif next_page_token["typename"] == "PullRequestReviewComment":
|
1225
|
+
query = self.query_reactions.get_query_root_review(node_id=next_page_token["parent_id"], first=page_size, after=after)
|
1226
|
+
elif next_page_token["typename"] == "Reaction":
|
1227
|
+
query = self.query_reactions.get_query_root_comment(node_id=next_page_token["parent_id"], first=page_size, after=after)
|
1228
|
+
else:
|
1229
|
+
query = self.query_reactions.get_query_root_repository(owner=organization, name=name, first=self.page_size)
|
1230
|
+
|
1231
|
+
return {"query": query}
|
1232
|
+
|
1233
|
+
|
1234
|
+
class Deployments(SemiIncrementalMixin, GithubStream):
|
1235
|
+
"""
|
1236
|
+
API docs: https://docs.github.com/en/rest/deployments/deployments?apiVersion=2022-11-28#list-deployments
|
1237
|
+
"""
|
1238
|
+
|
1239
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
1240
|
+
return f"repos/{stream_slice['repository']}/deployments"
|
1241
|
+
|
1242
|
+
|
1243
|
+
class ProjectColumns(GithubStream):
|
1244
|
+
"""
|
1245
|
+
API docs: https://docs.github.com/en/rest/projects/columns?apiVersion=2022-11-28#list-project-columns
|
1246
|
+
"""
|
1247
|
+
|
1248
|
+
use_cache = True
|
1249
|
+
cursor_field = "updated_at"
|
1250
|
+
|
1251
|
+
def __init__(self, parent: HttpStream, start_date: str, **kwargs):
|
1252
|
+
super().__init__(**kwargs)
|
1253
|
+
self.parent = parent
|
1254
|
+
self._start_date = start_date
|
1255
|
+
|
1256
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
1257
|
+
return f"projects/{stream_slice['project_id']}/columns"
|
1258
|
+
|
1259
|
+
def stream_slices(
|
1260
|
+
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
1261
|
+
) -> Iterable[Optional[Mapping[str, Any]]]:
|
1262
|
+
parent_stream_slices = self.parent.stream_slices(
|
1263
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_state=stream_state
|
1264
|
+
)
|
1265
|
+
for stream_slice in parent_stream_slices:
|
1266
|
+
parent_records = self.parent.read_records(
|
1267
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
1268
|
+
)
|
1269
|
+
for record in parent_records:
|
1270
|
+
yield {"repository": record["repository"], "project_id": record["id"]}
|
1271
|
+
|
1272
|
+
def read_records(
|
1273
|
+
self,
|
1274
|
+
sync_mode: SyncMode,
|
1275
|
+
cursor_field: List[str] = None,
|
1276
|
+
stream_slice: Mapping[str, Any] = None,
|
1277
|
+
stream_state: Mapping[str, Any] = None,
|
1278
|
+
) -> Iterable[Mapping[str, Any]]:
|
1279
|
+
starting_point = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
|
1280
|
+
for record in super().read_records(
|
1281
|
+
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
1282
|
+
):
|
1283
|
+
if not starting_point or record[self.cursor_field] > starting_point:
|
1284
|
+
yield record
|
1285
|
+
|
1286
|
+
def get_starting_point(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any]) -> str:
|
1287
|
+
if stream_state:
|
1288
|
+
repository = stream_slice["repository"]
|
1289
|
+
project_id = str(stream_slice["project_id"])
|
1290
|
+
stream_state_value = stream_state.get(repository, {}).get(project_id, {}).get(self.cursor_field)
|
1291
|
+
if stream_state_value:
|
1292
|
+
if self._start_date:
|
1293
|
+
return max(self._start_date, stream_state_value)
|
1294
|
+
return stream_state_value
|
1295
|
+
return self._start_date
|
1296
|
+
|
1297
|
+
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]):
|
1298
|
+
repository = latest_record["repository"]
|
1299
|
+
project_id = str(latest_record["project_id"])
|
1300
|
+
updated_state = latest_record[self.cursor_field]
|
1301
|
+
stream_state_value = current_stream_state.get(repository, {}).get(project_id, {}).get(self.cursor_field)
|
1302
|
+
if stream_state_value:
|
1303
|
+
updated_state = max(updated_state, stream_state_value)
|
1304
|
+
current_stream_state.setdefault(repository, {}).setdefault(project_id, {})[self.cursor_field] = updated_state
|
1305
|
+
return current_stream_state
|
1306
|
+
|
1307
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
1308
|
+
record = super().transform(record=record, stream_slice=stream_slice)
|
1309
|
+
record["project_id"] = stream_slice["project_id"]
|
1310
|
+
return record
|
1311
|
+
|
1312
|
+
|
1313
|
+
class ProjectCards(GithubStream):
|
1314
|
+
"""
|
1315
|
+
API docs: https://docs.github.com/en/rest/projects/cards?apiVersion=2022-11-28#list-project-cards
|
1316
|
+
"""
|
1317
|
+
|
1318
|
+
cursor_field = "updated_at"
|
1319
|
+
stream_base_params = {"archived_state": "all"}
|
1320
|
+
|
1321
|
+
def __init__(self, parent: HttpStream, start_date: str, **kwargs):
|
1322
|
+
super().__init__(**kwargs)
|
1323
|
+
self.parent = parent
|
1324
|
+
self._start_date = start_date
|
1325
|
+
|
1326
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
1327
|
+
return f"projects/columns/{stream_slice['column_id']}/cards"
|
1328
|
+
|
1329
|
+
def stream_slices(
|
1330
|
+
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
1331
|
+
) -> Iterable[Optional[Mapping[str, Any]]]:
|
1332
|
+
parent_stream_slices = self.parent.stream_slices(
|
1333
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_state=stream_state
|
1334
|
+
)
|
1335
|
+
for stream_slice in parent_stream_slices:
|
1336
|
+
parent_records = self.parent.read_records(
|
1337
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
1338
|
+
)
|
1339
|
+
for record in parent_records:
|
1340
|
+
yield {"repository": record["repository"], "project_id": record["project_id"], "column_id": record["id"]}
|
1341
|
+
|
1342
|
+
def read_records(
|
1343
|
+
self,
|
1344
|
+
sync_mode: SyncMode,
|
1345
|
+
cursor_field: List[str] = None,
|
1346
|
+
stream_slice: Mapping[str, Any] = None,
|
1347
|
+
stream_state: Mapping[str, Any] = None,
|
1348
|
+
) -> Iterable[Mapping[str, Any]]:
|
1349
|
+
starting_point = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
|
1350
|
+
for record in super().read_records(
|
1351
|
+
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
1352
|
+
):
|
1353
|
+
if not starting_point or record[self.cursor_field] > starting_point:
|
1354
|
+
yield record
|
1355
|
+
|
1356
|
+
def get_starting_point(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any]) -> str:
|
1357
|
+
if stream_state:
|
1358
|
+
repository = stream_slice["repository"]
|
1359
|
+
project_id = str(stream_slice["project_id"])
|
1360
|
+
column_id = str(stream_slice["column_id"])
|
1361
|
+
stream_state_value = stream_state.get(repository, {}).get(project_id, {}).get(column_id, {}).get(self.cursor_field)
|
1362
|
+
if stream_state_value:
|
1363
|
+
if self._start_date:
|
1364
|
+
return max(self._start_date, stream_state_value)
|
1365
|
+
return stream_state_value
|
1366
|
+
return self._start_date
|
1367
|
+
|
1368
|
+
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]):
|
1369
|
+
repository = latest_record["repository"]
|
1370
|
+
project_id = str(latest_record["project_id"])
|
1371
|
+
column_id = str(latest_record["column_id"])
|
1372
|
+
updated_state = latest_record[self.cursor_field]
|
1373
|
+
stream_state_value = current_stream_state.get(repository, {}).get(project_id, {}).get(column_id, {}).get(self.cursor_field)
|
1374
|
+
if stream_state_value:
|
1375
|
+
updated_state = max(updated_state, stream_state_value)
|
1376
|
+
current_stream_state.setdefault(repository, {}).setdefault(project_id, {}).setdefault(column_id, {})[
|
1377
|
+
self.cursor_field
|
1378
|
+
] = updated_state
|
1379
|
+
return current_stream_state
|
1380
|
+
|
1381
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
1382
|
+
record = super().transform(record=record, stream_slice=stream_slice)
|
1383
|
+
record["project_id"] = stream_slice["project_id"]
|
1384
|
+
record["column_id"] = stream_slice["column_id"]
|
1385
|
+
return record
|
1386
|
+
|
1387
|
+
|
1388
|
+
class Workflows(SemiIncrementalMixin, GithubStream):
|
1389
|
+
"""
|
1390
|
+
Get all workflows of a GitHub repository
|
1391
|
+
API documentation: https://docs.github.com/en/rest/actions/workflows?apiVersion=2022-11-28#list-repository-workflows
|
1392
|
+
"""
|
1393
|
+
|
1394
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
1395
|
+
return f"repos/{stream_slice['repository']}/actions/workflows"
|
1396
|
+
|
1397
|
+
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
|
1398
|
+
response = response.json().get("workflows")
|
1399
|
+
for record in response:
|
1400
|
+
yield self.transform(record=record, stream_slice=stream_slice)
|
1401
|
+
|
1402
|
+
def convert_cursor_value(self, value):
|
1403
|
+
return pendulum.parse(value).in_tz(tz="UTC").format("YYYY-MM-DDTHH:mm:ss[Z]")
|
1404
|
+
|
1405
|
+
|
1406
|
+
class WorkflowRuns(SemiIncrementalMixin, GithubStream):
|
1407
|
+
"""
|
1408
|
+
Get all workflow runs for a GitHub repository
|
1409
|
+
API documentation: https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-repository
|
1410
|
+
"""
|
1411
|
+
|
1412
|
+
# key for accessing slice value from record
|
1413
|
+
record_slice_key = ["repository", "full_name"]
|
1414
|
+
|
1415
|
+
# https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs
|
1416
|
+
re_run_period = 32 # days
|
1417
|
+
|
1418
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
1419
|
+
return f"repos/{stream_slice['repository']}/actions/runs"
|
1420
|
+
|
1421
|
+
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
|
1422
|
+
response = response.json().get("workflow_runs")
|
1423
|
+
for record in response:
|
1424
|
+
yield record
|
1425
|
+
|
1426
|
+
def read_records(
|
1427
|
+
self,
|
1428
|
+
sync_mode: SyncMode,
|
1429
|
+
cursor_field: List[str] = None,
|
1430
|
+
stream_slice: Mapping[str, Any] = None,
|
1431
|
+
stream_state: Mapping[str, Any] = None,
|
1432
|
+
) -> Iterable[Mapping[str, Any]]:
|
1433
|
+
# Records in the workflows_runs stream are naturally descending sorted by `created_at` field.
|
1434
|
+
# On first sight this is not big deal because cursor_field is `updated_at`.
|
1435
|
+
# But we still can use `created_at` as a breakpoint because after 30 days period
|
1436
|
+
# https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs
|
1437
|
+
# workflows_runs records cannot be updated. It means if we initially fully synced stream on subsequent incremental sync we need
|
1438
|
+
# only to look behind on 30 days to find all records which were updated.
|
1439
|
+
start_point = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
|
1440
|
+
break_point = None
|
1441
|
+
if start_point:
|
1442
|
+
break_point = (pendulum.parse(start_point) - pendulum.duration(days=self.re_run_period)).to_iso8601_string()
|
1443
|
+
for record in super(SemiIncrementalMixin, self).read_records(
|
1444
|
+
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
1445
|
+
):
|
1446
|
+
cursor_value = record[self.cursor_field]
|
1447
|
+
created_at = record["created_at"]
|
1448
|
+
if not start_point or cursor_value > start_point:
|
1449
|
+
yield record
|
1450
|
+
if break_point and created_at < break_point:
|
1451
|
+
break
|
1452
|
+
|
1453
|
+
|
1454
|
+
class WorkflowJobs(SemiIncrementalMixin, GithubStream):
|
1455
|
+
"""
|
1456
|
+
Get all workflow jobs for a workflow run
|
1457
|
+
API documentation: https://docs.github.com/pt/rest/actions/workflow-jobs?apiVersion=2022-11-28#list-jobs-for-a-workflow-run
|
1458
|
+
"""
|
1459
|
+
|
1460
|
+
cursor_field = "completed_at"
|
1461
|
+
|
1462
|
+
def __init__(self, parent: WorkflowRuns, **kwargs):
|
1463
|
+
super().__init__(**kwargs)
|
1464
|
+
self.parent = parent
|
1465
|
+
|
1466
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
1467
|
+
return f"repos/{stream_slice['repository']}/actions/runs/{stream_slice['run_id']}/jobs"
|
1468
|
+
|
1469
|
+
def read_records(
|
1470
|
+
self,
|
1471
|
+
sync_mode: SyncMode,
|
1472
|
+
cursor_field: List[str] = None,
|
1473
|
+
stream_slice: Mapping[str, Any] = None,
|
1474
|
+
stream_state: Mapping[str, Any] = None,
|
1475
|
+
) -> Iterable[Mapping[str, Any]]:
|
1476
|
+
parent_stream_state = None
|
1477
|
+
if stream_state is not None:
|
1478
|
+
parent_stream_state = {repository: {self.parent.cursor_field: v[self.cursor_field]} for repository, v in stream_state.items()}
|
1479
|
+
parent_stream_slices = self.parent.stream_slices(sync_mode=sync_mode, cursor_field=cursor_field, stream_state=parent_stream_state)
|
1480
|
+
for stream_slice in parent_stream_slices:
|
1481
|
+
parent_records = self.parent.read_records(
|
1482
|
+
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=parent_stream_state
|
1483
|
+
)
|
1484
|
+
for record in parent_records:
|
1485
|
+
stream_slice["run_id"] = record["id"]
|
1486
|
+
yield from super().read_records(
|
1487
|
+
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
1488
|
+
)
|
1489
|
+
|
1490
|
+
def parse_response(
|
1491
|
+
self,
|
1492
|
+
response: requests.Response,
|
1493
|
+
stream_state: Mapping[str, Any],
|
1494
|
+
stream_slice: Mapping[str, Any] = None,
|
1495
|
+
next_page_token: Mapping[str, Any] = None,
|
1496
|
+
) -> Iterable[Mapping]:
|
1497
|
+
for record in response.json()["jobs"]:
|
1498
|
+
if record.get(self.cursor_field):
|
1499
|
+
yield self.transform(record=record, stream_slice=stream_slice)
|
1500
|
+
|
1501
|
+
def request_params(
|
1502
|
+
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
1503
|
+
) -> MutableMapping[str, Any]:
|
1504
|
+
params = super().request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token)
|
1505
|
+
params["filter"] = "all"
|
1506
|
+
return params
|
1507
|
+
|
1508
|
+
|
1509
|
+
class TeamMembers(GithubStream):
|
1510
|
+
"""
|
1511
|
+
API docs: https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#list-team-members
|
1512
|
+
"""
|
1513
|
+
|
1514
|
+
use_cache = True
|
1515
|
+
primary_key = ["id", "team_slug"]
|
1516
|
+
|
1517
|
+
def __init__(self, parent: Teams, **kwargs):
|
1518
|
+
super().__init__(**kwargs)
|
1519
|
+
self.parent = parent
|
1520
|
+
|
1521
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
1522
|
+
return f"orgs/{stream_slice['organization']}/teams/{stream_slice['team_slug']}/members"
|
1523
|
+
|
1524
|
+
def stream_slices(
|
1525
|
+
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
1526
|
+
) -> Iterable[Optional[Mapping[str, Any]]]:
|
1527
|
+
parent_stream_slices = self.parent.stream_slices(
|
1528
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_state=stream_state
|
1529
|
+
)
|
1530
|
+
for stream_slice in parent_stream_slices:
|
1531
|
+
parent_records = self.parent.read_records(
|
1532
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
1533
|
+
)
|
1534
|
+
for record in parent_records:
|
1535
|
+
yield {"organization": record["organization"], "team_slug": record["slug"]}
|
1536
|
+
|
1537
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
1538
|
+
record["organization"] = stream_slice["organization"]
|
1539
|
+
record["team_slug"] = stream_slice["team_slug"]
|
1540
|
+
return record
|
1541
|
+
|
1542
|
+
|
1543
|
+
class TeamMemberships(GithubStream):
|
1544
|
+
"""
|
1545
|
+
API docs: https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#get-team-membership-for-a-user
|
1546
|
+
"""
|
1547
|
+
|
1548
|
+
primary_key = ["url"]
|
1549
|
+
|
1550
|
+
def __init__(self, parent: TeamMembers, **kwargs):
|
1551
|
+
super().__init__(**kwargs)
|
1552
|
+
self.parent = parent
|
1553
|
+
|
1554
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
1555
|
+
return f"orgs/{stream_slice['organization']}/teams/{stream_slice['team_slug']}/memberships/{stream_slice['username']}"
|
1556
|
+
|
1557
|
+
def stream_slices(
|
1558
|
+
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
1559
|
+
) -> Iterable[Optional[Mapping[str, Any]]]:
|
1560
|
+
parent_stream_slices = self.parent.stream_slices(
|
1561
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_state=stream_state
|
1562
|
+
)
|
1563
|
+
for stream_slice in parent_stream_slices:
|
1564
|
+
parent_records = self.parent.read_records(
|
1565
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
1566
|
+
)
|
1567
|
+
for record in parent_records:
|
1568
|
+
yield {"organization": record["organization"], "team_slug": record["team_slug"], "username": record["login"]}
|
1569
|
+
|
1570
|
+
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
|
1571
|
+
yield self.transform(response.json(), stream_slice=stream_slice)
|
1572
|
+
|
1573
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
1574
|
+
record["organization"] = stream_slice["organization"]
|
1575
|
+
record["team_slug"] = stream_slice["team_slug"]
|
1576
|
+
record["username"] = stream_slice["username"]
|
1577
|
+
return record
|
1578
|
+
|
1579
|
+
|
1580
|
+
class ContributorActivity(GithubStream):
|
1581
|
+
"""
|
1582
|
+
API docs: https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-all-contributor-commit-activity
|
1583
|
+
"""
|
1584
|
+
|
1585
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
1586
|
+
return f"repos/{stream_slice['repository']}/stats/contributors"
|
1587
|
+
|
1588
|
+
def request_headers(self, **kwargs) -> Mapping[str, Any]:
|
1589
|
+
params = super().request_headers(**kwargs)
|
1590
|
+
params.update({"Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28"})
|
1591
|
+
return params
|
1592
|
+
|
1593
|
+
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
1594
|
+
record["repository"] = stream_slice["repository"]
|
1595
|
+
record.update(record.pop("author"))
|
1596
|
+
return record
|
1597
|
+
|
1598
|
+
def should_retry(self, response: requests.Response) -> bool:
|
1599
|
+
"""
|
1600
|
+
If the data hasn't been cached when you query a repository's statistics, you'll receive a 202 response, need to retry to get results
|
1601
|
+
see for more info https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#a-word-about-caching
|
1602
|
+
"""
|
1603
|
+
if super().should_retry(response) or response.status_code == requests.codes.ACCEPTED:
|
1604
|
+
return True
|
1605
|
+
|
1606
|
+
def backoff_time(self, response: requests.Response) -> Optional[float]:
|
1607
|
+
return 90 if response.status_code == requests.codes.ACCEPTED else super().backoff_time(response)
|
1608
|
+
|
1609
|
+
def parse_response(
|
1610
|
+
self,
|
1611
|
+
response: requests.Response,
|
1612
|
+
stream_state: Mapping[str, Any],
|
1613
|
+
stream_slice: Mapping[str, Any] = None,
|
1614
|
+
next_page_token: Mapping[str, Any] = None,
|
1615
|
+
) -> Iterable[Mapping]:
|
1616
|
+
if response.status_code == requests.codes.NO_CONTENT:
|
1617
|
+
self.logger.warning(f"Empty response received for {self.name} stats in repository {stream_slice.get('repository')}")
|
1618
|
+
else:
|
1619
|
+
yield from super().parse_response(
|
1620
|
+
response, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
|
1621
|
+
)
|
1622
|
+
|
1623
|
+
def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping[str, Any]]:
|
1624
|
+
repository = stream_slice.get("repository", "")
|
1625
|
+
try:
|
1626
|
+
yield from super().read_records(stream_slice=stream_slice, **kwargs)
|
1627
|
+
except HTTPError as e:
|
1628
|
+
if e.response.status_code == requests.codes.ACCEPTED:
|
1629
|
+
yield AirbyteMessage(
|
1630
|
+
type=MessageType.LOG,
|
1631
|
+
log=AirbyteLogMessage(
|
1632
|
+
level=Level.INFO,
|
1633
|
+
message=f"Syncing `{self.__class__.__name__}` " f"stream isn't available for repository `{repository}`.",
|
1634
|
+
),
|
1635
|
+
)
|
1636
|
+
else:
|
1637
|
+
raise e
|
1638
|
+
|
1639
|
+
|
1640
|
+
class IssueTimelineEvents(GithubStream):
|
1641
|
+
"""
|
1642
|
+
API docs https://docs.github.com/en/rest/issues/timeline?apiVersion=2022-11-28#list-timeline-events-for-an-issue
|
1643
|
+
"""
|
1644
|
+
|
1645
|
+
primary_key = ["repository", "issue_number"]
|
1646
|
+
|
1647
|
+
def __init__(self, **kwargs):
|
1648
|
+
super().__init__(**kwargs)
|
1649
|
+
self.parent = Issues(**kwargs)
|
1650
|
+
|
1651
|
+
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
1652
|
+
return f"repos/{stream_slice['repository']}/issues/{stream_slice['number']}/timeline"
|
1653
|
+
|
1654
|
+
def stream_slices(
|
1655
|
+
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
1656
|
+
) -> Iterable[Optional[Mapping[str, Any]]]:
|
1657
|
+
parent_stream_slices = self.parent.stream_slices(
|
1658
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_state=stream_state
|
1659
|
+
)
|
1660
|
+
for stream_slice in parent_stream_slices:
|
1661
|
+
parent_records = self.parent.read_records(
|
1662
|
+
sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
1663
|
+
)
|
1664
|
+
for record in parent_records:
|
1665
|
+
yield {"repository": record["repository"], "number": record["number"]}
|
1666
|
+
|
1667
|
+
def parse_response(
|
1668
|
+
self,
|
1669
|
+
response: requests.Response,
|
1670
|
+
stream_state: Mapping[str, Any],
|
1671
|
+
stream_slice: Mapping[str, Any] = None,
|
1672
|
+
next_page_token: Mapping[str, Any] = None,
|
1673
|
+
) -> Iterable[Mapping]:
|
1674
|
+
events_list = response.json()
|
1675
|
+
record = {"repository": stream_slice["repository"], "issue_number": stream_slice["number"]}
|
1676
|
+
for event in events_list:
|
1677
|
+
record[event["event"]] = event
|
1678
|
+
yield record
|