airbyte-source-gitlab 3.0.0.dev202403072311__py3-none-any.whl → 4.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_gitlab-3.0.0.dev202403072311.dist-info → airbyte_source_gitlab-4.0.1.dist-info}/METADATA +3 -3
- {airbyte_source_gitlab-3.0.0.dev202403072311.dist-info → airbyte_source_gitlab-4.0.1.dist-info}/RECORD +7 -6
- source_gitlab/components/partition_routers.py +43 -0
- source_gitlab/manifest.yaml +613 -0
- source_gitlab/source.py +9 -224
- source_gitlab/streams.py +0 -431
- {airbyte_source_gitlab-3.0.0.dev202403072311.dist-info → airbyte_source_gitlab-4.0.1.dist-info}/WHEEL +0 -0
- {airbyte_source_gitlab-3.0.0.dev202403072311.dist-info → airbyte_source_gitlab-4.0.1.dist-info}/entry_points.txt +0 -0
source_gitlab/source.py
CHANGED
@@ -1,245 +1,30 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
5
|
|
6
|
-
import
|
7
|
-
from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
|
6
|
+
from typing import Any, MutableMapping, Tuple
|
8
7
|
|
9
|
-
import
|
10
|
-
from airbyte_cdk.
|
11
|
-
from airbyte_cdk.models import SyncMode
|
12
|
-
from airbyte_cdk.sources import AbstractSource
|
13
|
-
from airbyte_cdk.sources.streams import Stream
|
14
|
-
from airbyte_cdk.sources.streams.http.requests_native_auth.oauth import SingleUseRefreshTokenOauth2Authenticator
|
15
|
-
from airbyte_cdk.sources.streams.http.requests_native_auth.token import TokenAuthenticator
|
16
|
-
from airbyte_cdk.utils import AirbyteTracedException
|
17
|
-
from requests.auth import AuthBase
|
18
|
-
from requests.exceptions import HTTPError
|
8
|
+
from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
|
9
|
+
from airbyte_cdk.utils import is_cloud_environment
|
19
10
|
|
20
|
-
from .streams import (
|
21
|
-
Branches,
|
22
|
-
Commits,
|
23
|
-
Deployments,
|
24
|
-
EpicIssues,
|
25
|
-
Epics,
|
26
|
-
GitlabStream,
|
27
|
-
GroupIssueBoards,
|
28
|
-
GroupLabels,
|
29
|
-
GroupMembers,
|
30
|
-
GroupMilestones,
|
31
|
-
GroupProjects,
|
32
|
-
Groups,
|
33
|
-
GroupsList,
|
34
|
-
IncludeDescendantGroups,
|
35
|
-
Issues,
|
36
|
-
Jobs,
|
37
|
-
MergeRequestCommits,
|
38
|
-
MergeRequests,
|
39
|
-
Pipelines,
|
40
|
-
PipelinesExtended,
|
41
|
-
ProjectLabels,
|
42
|
-
ProjectMembers,
|
43
|
-
ProjectMilestones,
|
44
|
-
Projects,
|
45
|
-
Releases,
|
46
|
-
Tags,
|
47
|
-
Users,
|
48
|
-
)
|
49
11
|
from .utils import parse_url
|
50
12
|
|
51
13
|
|
52
|
-
class
|
53
|
-
def __init__(self
|
54
|
-
super().__init__(
|
55
|
-
self._created_at_name = created_at_name
|
56
|
-
|
57
|
-
def get_created_at_name(self) -> str:
|
58
|
-
return self._created_at_name
|
59
|
-
|
60
|
-
def get_access_token(self) -> str:
|
61
|
-
if self.token_has_expired():
|
62
|
-
new_access_token, access_token_expires_in, access_token_created_at, new_refresh_token = self.refresh_access_token()
|
63
|
-
new_token_expiry_date = self.get_new_token_expiry_date(access_token_expires_in, access_token_created_at)
|
64
|
-
self.access_token = new_access_token
|
65
|
-
self.set_refresh_token(new_refresh_token)
|
66
|
-
self.set_token_expiry_date(new_token_expiry_date)
|
67
|
-
emit_configuration_as_airbyte_control_message(self._connector_config)
|
68
|
-
return self.access_token
|
69
|
-
|
70
|
-
@staticmethod
|
71
|
-
def get_new_token_expiry_date(access_token_expires_in: int, access_token_created_at: int) -> pendulum.DateTime:
|
72
|
-
return pendulum.from_timestamp(access_token_created_at + access_token_expires_in)
|
73
|
-
|
74
|
-
def refresh_access_token(self) -> Tuple[str, int, int, str]:
|
75
|
-
response_json = self._get_refresh_access_token_response()
|
76
|
-
return (
|
77
|
-
response_json[self.get_access_token_name()],
|
78
|
-
response_json[self.get_expires_in_name()],
|
79
|
-
response_json[self.get_created_at_name()],
|
80
|
-
response_json[self.get_refresh_token_name()],
|
81
|
-
)
|
82
|
-
|
83
|
-
|
84
|
-
def get_authenticator(config: MutableMapping) -> AuthBase:
|
85
|
-
if config["credentials"]["auth_type"] == "access_token":
|
86
|
-
return TokenAuthenticator(token=config["credentials"]["access_token"])
|
87
|
-
return SingleUseRefreshTokenGitlabOAuth2Authenticator(
|
88
|
-
config,
|
89
|
-
token_refresh_endpoint=f"https://{config['api_url']}/oauth/token",
|
90
|
-
refresh_token_error_status_codes=(400,),
|
91
|
-
refresh_token_error_key="error",
|
92
|
-
refresh_token_error_values="invalid_grant",
|
93
|
-
)
|
94
|
-
|
95
|
-
|
96
|
-
class SourceGitlab(AbstractSource):
|
97
|
-
def __init__(self, *args, **kwargs):
|
98
|
-
super().__init__(*args, **kwargs)
|
99
|
-
self.__auth_params: Mapping[str, Any] = {}
|
100
|
-
self.__groups_stream: Optional[GitlabStream] = None
|
101
|
-
self.__projects_stream: Optional[GitlabStream] = None
|
14
|
+
class SourceGitlab(YamlDeclarativeSource):
|
15
|
+
def __init__(self):
|
16
|
+
super().__init__(**{"path_to_yaml": "manifest.yaml"})
|
102
17
|
|
103
18
|
@staticmethod
|
104
19
|
def _ensure_default_values(config: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
105
20
|
config["api_url"] = config.get("api_url") or "gitlab.com"
|
106
21
|
return config
|
107
22
|
|
108
|
-
def _groups_stream(self, config: MutableMapping[str, Any]) -> Groups:
|
109
|
-
if not self.__groups_stream:
|
110
|
-
auth_params = self._auth_params(config)
|
111
|
-
group_ids = list(map(lambda x: x["id"], self._get_group_list(config)))
|
112
|
-
self.__groups_stream = Groups(group_ids=group_ids, **auth_params)
|
113
|
-
return self.__groups_stream
|
114
|
-
|
115
|
-
def _projects_stream(self, config: MutableMapping[str, Any]) -> Union[Projects, GroupProjects]:
|
116
|
-
if not self.__projects_stream:
|
117
|
-
auth_params = self._auth_params(config)
|
118
|
-
project_ids = config.get("projects_list", [])
|
119
|
-
groups_stream = self._groups_stream(config)
|
120
|
-
if groups_stream.group_ids:
|
121
|
-
self.__projects_stream = GroupProjects(project_ids=project_ids, parent_stream=groups_stream, **auth_params)
|
122
|
-
return self.__projects_stream
|
123
|
-
self.__projects_stream = Projects(project_ids=project_ids, **auth_params)
|
124
|
-
return self.__projects_stream
|
125
|
-
|
126
|
-
def _auth_params(self, config: MutableMapping[str, Any]) -> Mapping[str, Any]:
|
127
|
-
if not self.__auth_params:
|
128
|
-
auth = get_authenticator(config)
|
129
|
-
self.__auth_params = dict(authenticator=auth, api_url=config["api_url"])
|
130
|
-
return self.__auth_params
|
131
|
-
|
132
|
-
def _get_group_list(self, config: MutableMapping[str, Any]) -> List[str]:
|
133
|
-
group_ids = config.get("groups_list")
|
134
|
-
# Gitlab exposes different APIs to get a list of groups.
|
135
|
-
# We use https://docs.gitlab.com/ee/api/groups.html#list-groups in case there's no group IDs in the input config.
|
136
|
-
# This API provides full information about all available groups, including subgroups.
|
137
|
-
#
|
138
|
-
# In case there is a definitive list of groups IDs in the input config, the above API can not be used since
|
139
|
-
# it does not support filtering by group ID, so we use
|
140
|
-
# https://docs.gitlab.com/ee/api/groups.html#details-of-a-group and
|
141
|
-
# https: //docs.gitlab.com/ee/api/groups.html#list-a-groups-descendant-groups for each group ID. The latter one does not
|
142
|
-
# provide full group info so can only be used to retrieve alist of group IDs and pass it further to init a corresponding stream.
|
143
|
-
auth_params = self._auth_params(config)
|
144
|
-
stream = GroupsList(**auth_params) if not group_ids else IncludeDescendantGroups(group_ids=group_ids, **auth_params)
|
145
|
-
for stream_slice in stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
146
|
-
yield from stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice)
|
147
|
-
|
148
|
-
@staticmethod
|
149
|
-
def _is_http_allowed() -> bool:
|
150
|
-
return os.environ.get("DEPLOYMENT_MODE", "").upper() != "CLOUD"
|
151
|
-
|
152
|
-
def _try_refresh_access_token(self, logger, config: Mapping[str, Any]) -> Mapping[str, Any]:
|
153
|
-
"""
|
154
|
-
This method attempts to refresh the expired `access_token`, while `refresh_token` is still valid.
|
155
|
-
In order to obtain the new `refresh_token`, the Customer should `re-auth` in the source settings.
|
156
|
-
"""
|
157
|
-
# get current authenticator
|
158
|
-
authenticator: Union[SingleUseRefreshTokenOauth2Authenticator, TokenAuthenticator] = self.__auth_params.get("authenticator")
|
159
|
-
if isinstance(authenticator, SingleUseRefreshTokenOauth2Authenticator):
|
160
|
-
try:
|
161
|
-
creds = authenticator.refresh_access_token()
|
162
|
-
# update the actual config values
|
163
|
-
config["credentials"]["access_token"] = creds[0]
|
164
|
-
config["credentials"]["refresh_token"] = creds[3]
|
165
|
-
config["credentials"]["token_expiry_date"] = authenticator.get_new_token_expiry_date(creds[1], creds[2]).to_rfc3339_string()
|
166
|
-
# update the config
|
167
|
-
emit_configuration_as_airbyte_control_message(config)
|
168
|
-
logger.info("The `access_token` was successfully refreshed.")
|
169
|
-
return config
|
170
|
-
except (AirbyteTracedException, HTTPError) as http_error:
|
171
|
-
raise http_error
|
172
|
-
except Exception as e:
|
173
|
-
raise Exception(f"Unknown error occurred while refreshing the `access_token`, details: {e}")
|
174
|
-
|
175
|
-
def _handle_expired_access_token_error(self, logger, config: Mapping[str, Any]) -> Tuple[bool, Any]:
|
176
|
-
try:
|
177
|
-
return self.check_connection(logger, self._try_refresh_access_token(logger, config))
|
178
|
-
except HTTPError as http_error:
|
179
|
-
return False, f"Unable to refresh the `access_token`, please re-authenticate in Sources > Settings. Details: {http_error}"
|
180
|
-
|
181
23
|
def check_connection(self, logger, config) -> Tuple[bool, Any]:
|
182
24
|
config = self._ensure_default_values(config)
|
183
25
|
is_valid, scheme, _ = parse_url(config["api_url"])
|
184
26
|
if not is_valid:
|
185
27
|
return False, "Invalid API resource locator."
|
186
|
-
if scheme == "http" and
|
28
|
+
if scheme == "http" and is_cloud_environment():
|
187
29
|
return False, "Http scheme is not allowed in this environment. Please use `https` instead."
|
188
|
-
|
189
|
-
projects = self._projects_stream(config)
|
190
|
-
for stream_slice in projects.stream_slices(sync_mode=SyncMode.full_refresh):
|
191
|
-
try:
|
192
|
-
next(projects.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice))
|
193
|
-
return True, None
|
194
|
-
except StopIteration:
|
195
|
-
# in case groups/projects provided and 404 occurs
|
196
|
-
return False, "Groups and/or projects that you provide are invalid or you don't have permission to view it."
|
197
|
-
return True, None # in case there's no projects
|
198
|
-
except HTTPError as http_error:
|
199
|
-
if config["credentials"]["auth_type"] == "oauth2.0":
|
200
|
-
if http_error.response.status_code == 401:
|
201
|
-
return self._handle_expired_access_token_error(logger, config)
|
202
|
-
elif http_error.response.status_code == 500:
|
203
|
-
return False, f"Unable to connect to Gitlab API with the provided credentials - {repr(http_error)}"
|
204
|
-
else:
|
205
|
-
return False, f"Unable to connect to Gitlab API with the provided Private Access Token - {repr(http_error)}"
|
206
|
-
except Exception as error:
|
207
|
-
return False, f"Unknown error occurred while checking the connection - {repr(error)}"
|
208
|
-
|
209
|
-
def streams(self, config: MutableMapping[str, Any]) -> List[Stream]:
|
210
|
-
config = self._ensure_default_values(config)
|
211
|
-
auth_params = self._auth_params(config)
|
212
|
-
start_date = config.get("start_date")
|
213
|
-
|
214
|
-
groups, projects = self._groups_stream(config), self._projects_stream(config)
|
215
|
-
pipelines = Pipelines(parent_stream=projects, start_date=start_date, **auth_params)
|
216
|
-
merge_requests = MergeRequests(parent_stream=projects, start_date=start_date, **auth_params)
|
217
|
-
epics = Epics(parent_stream=groups, **auth_params)
|
218
|
-
|
219
|
-
streams = [
|
220
|
-
groups,
|
221
|
-
projects,
|
222
|
-
Branches(parent_stream=projects, repository_part=True, **auth_params),
|
223
|
-
Commits(parent_stream=projects, repository_part=True, start_date=start_date, **auth_params),
|
224
|
-
epics,
|
225
|
-
Deployments(parent_stream=projects, **auth_params),
|
226
|
-
EpicIssues(parent_stream=epics, **auth_params),
|
227
|
-
GroupIssueBoards(parent_stream=groups, **auth_params),
|
228
|
-
Issues(parent_stream=projects, start_date=start_date, **auth_params),
|
229
|
-
Jobs(parent_stream=pipelines, **auth_params),
|
230
|
-
ProjectMilestones(parent_stream=projects, **auth_params),
|
231
|
-
GroupMilestones(parent_stream=groups, **auth_params),
|
232
|
-
ProjectMembers(parent_stream=projects, **auth_params),
|
233
|
-
GroupMembers(parent_stream=groups, **auth_params),
|
234
|
-
ProjectLabels(parent_stream=projects, **auth_params),
|
235
|
-
GroupLabels(parent_stream=groups, **auth_params),
|
236
|
-
merge_requests,
|
237
|
-
MergeRequestCommits(parent_stream=merge_requests, **auth_params),
|
238
|
-
Releases(parent_stream=projects, **auth_params),
|
239
|
-
Tags(parent_stream=projects, repository_part=True, **auth_params),
|
240
|
-
pipelines,
|
241
|
-
PipelinesExtended(parent_stream=pipelines, **auth_params),
|
242
|
-
Users(parent_stream=projects, **auth_params),
|
243
|
-
]
|
244
|
-
|
245
|
-
return streams
|
30
|
+
return super().check_connection(logger, config)
|
source_gitlab/streams.py
DELETED
@@ -1,431 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import datetime
|
6
|
-
from abc import ABC
|
7
|
-
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Tuple
|
8
|
-
|
9
|
-
import pendulum
|
10
|
-
import requests
|
11
|
-
from airbyte_cdk.models import SyncMode
|
12
|
-
from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
|
13
|
-
from airbyte_cdk.sources.streams.core import StreamData
|
14
|
-
from airbyte_cdk.sources.streams.http import HttpStream
|
15
|
-
|
16
|
-
from .utils import parse_url
|
17
|
-
|
18
|
-
|
19
|
-
class GitlabStream(HttpStream, ABC):
|
20
|
-
primary_key = "id"
|
21
|
-
raise_on_http_errors = True
|
22
|
-
stream_base_params = {}
|
23
|
-
flatten_id_keys = []
|
24
|
-
flatten_list_keys = []
|
25
|
-
per_page = 50
|
26
|
-
non_retriable_codes: List[int] = (403, 404)
|
27
|
-
|
28
|
-
def __init__(self, api_url: str, **kwargs):
|
29
|
-
super().__init__(**kwargs)
|
30
|
-
self.api_url = api_url
|
31
|
-
self.page = 1
|
32
|
-
|
33
|
-
def read_records(
|
34
|
-
self,
|
35
|
-
sync_mode: SyncMode,
|
36
|
-
cursor_field: List[str] = None,
|
37
|
-
stream_slice: Mapping[str, Any] = None,
|
38
|
-
stream_state: Mapping[str, Any] = None,
|
39
|
-
) -> Iterable[StreamData]:
|
40
|
-
self.page = 1
|
41
|
-
yield from super().read_records(sync_mode, cursor_field, stream_slice, stream_state)
|
42
|
-
|
43
|
-
def request_params(
|
44
|
-
self,
|
45
|
-
stream_state: Mapping[str, Any],
|
46
|
-
stream_slice: Mapping[str, Any] = None,
|
47
|
-
next_page_token: Mapping[str, Any] = None,
|
48
|
-
) -> MutableMapping[str, Any]:
|
49
|
-
params = {"per_page": self.per_page}
|
50
|
-
if next_page_token:
|
51
|
-
params.update(next_page_token)
|
52
|
-
params.update(self.stream_base_params)
|
53
|
-
return params
|
54
|
-
|
55
|
-
@property
|
56
|
-
def url_base(self) -> str:
|
57
|
-
_, scheme, host = parse_url(self.api_url)
|
58
|
-
return f"{scheme}://{host}/api/v4/"
|
59
|
-
|
60
|
-
@property
|
61
|
-
def availability_strategy(self) -> Optional["AvailabilityStrategy"]:
|
62
|
-
return None
|
63
|
-
|
64
|
-
def should_retry(self, response: requests.Response) -> bool:
|
65
|
-
# Gitlab API returns a 403 response in case a feature is disabled in a project (pipelines/jobs for instance).
|
66
|
-
if response.status_code in self.non_retriable_codes:
|
67
|
-
setattr(self, "raise_on_http_errors", False)
|
68
|
-
self.logger.warning(
|
69
|
-
f"Got {response.status_code} error when accessing URL {response.request.url}."
|
70
|
-
f" Very likely the feature is disabled for this project and/or group. Please double check it, or report a bug otherwise."
|
71
|
-
)
|
72
|
-
return False
|
73
|
-
return super().should_retry(response)
|
74
|
-
|
75
|
-
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
76
|
-
if response.status_code in self.non_retriable_codes:
|
77
|
-
return
|
78
|
-
response_data = response.json()
|
79
|
-
if isinstance(response_data, dict):
|
80
|
-
return None
|
81
|
-
if len(response_data) == self.per_page:
|
82
|
-
self.page += 1
|
83
|
-
return {"page": self.page}
|
84
|
-
|
85
|
-
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
|
86
|
-
if response.status_code in self.non_retriable_codes:
|
87
|
-
return []
|
88
|
-
response_data = response.json()
|
89
|
-
if isinstance(response_data, list):
|
90
|
-
for record in response_data:
|
91
|
-
yield self.transform(record, **kwargs)
|
92
|
-
elif isinstance(response_data, dict):
|
93
|
-
yield self.transform(response_data, **kwargs)
|
94
|
-
else:
|
95
|
-
self.logger.info(f"Unsupported type of response data for stream {self.name}")
|
96
|
-
|
97
|
-
def transform(self, record: Dict[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs):
|
98
|
-
for key in self.flatten_id_keys:
|
99
|
-
self._flatten_id(record, key)
|
100
|
-
|
101
|
-
for key in self.flatten_list_keys:
|
102
|
-
self._flatten_list(record, key)
|
103
|
-
|
104
|
-
return record
|
105
|
-
|
106
|
-
def _flatten_id(self, record: Dict[str, Any], target: str):
|
107
|
-
target_value = record.get(target, None)
|
108
|
-
record[target + "_id"] = target_value.get("id") if target_value else None
|
109
|
-
|
110
|
-
def _flatten_list(self, record: Dict[str, Any], target: str):
|
111
|
-
record[target] = [target_data.get("id") for target_data in record.get(target, [])]
|
112
|
-
|
113
|
-
|
114
|
-
class GitlabChildStream(GitlabStream):
|
115
|
-
path_list = ["id"]
|
116
|
-
flatten_parent_id = False
|
117
|
-
|
118
|
-
def __init__(self, parent_stream: GitlabStream, repository_part: bool = False, **kwargs):
|
119
|
-
super().__init__(**kwargs)
|
120
|
-
self.parent_stream = parent_stream
|
121
|
-
self.repo_url = repository_part
|
122
|
-
|
123
|
-
@property
|
124
|
-
def path_template(self) -> str:
|
125
|
-
template = [self.parent_stream.name] + ["{" + path_key + "}" for path_key in self.path_list]
|
126
|
-
if self.repo_url:
|
127
|
-
template.append("repository")
|
128
|
-
return "/".join(template + [self.name])
|
129
|
-
|
130
|
-
def stream_slices(
|
131
|
-
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
132
|
-
) -> Iterable[Optional[Mapping[str, any]]]:
|
133
|
-
for slice in self.parent_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
134
|
-
for record in self.parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=slice):
|
135
|
-
yield {path_key: record[path_key] for path_key in self.path_list}
|
136
|
-
|
137
|
-
def path(self, stream_slice: Optional[Mapping[str, Any]] = None, **kwargs) -> str:
|
138
|
-
return self.path_template.format(**{path_key: stream_slice[path_key] for path_key in self.path_list})
|
139
|
-
|
140
|
-
def transform(self, record: Dict[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs):
|
141
|
-
super().transform(record, stream_slice, **kwargs)
|
142
|
-
if self.flatten_parent_id:
|
143
|
-
record[f"{self.parent_stream.name[:-1]}_id"] = stream_slice["id"]
|
144
|
-
return record
|
145
|
-
|
146
|
-
|
147
|
-
class IncrementalGitlabChildStream(GitlabChildStream):
|
148
|
-
state_checkpoint_interval = 100
|
149
|
-
cursor_field = "updated_at"
|
150
|
-
lower_bound_filter = "updated_after"
|
151
|
-
upper_bound_filter = "updated_before"
|
152
|
-
|
153
|
-
def __init__(self, start_date, **kwargs):
|
154
|
-
super().__init__(**kwargs)
|
155
|
-
self._start_date = start_date
|
156
|
-
|
157
|
-
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]:
|
158
|
-
"""
|
159
|
-
Return the latest state by comparing the cursor value in the latest record with the stream's most recent state object
|
160
|
-
and returning an updated state object.
|
161
|
-
"""
|
162
|
-
project_id = latest_record.get("project_id")
|
163
|
-
latest_cursor_value = latest_record.get(self.cursor_field)
|
164
|
-
current_state = current_stream_state.get(str(project_id))
|
165
|
-
if current_state:
|
166
|
-
current_state = current_state.get(self.cursor_field)
|
167
|
-
current_state_value = current_state or latest_cursor_value
|
168
|
-
max_value = max(pendulum.parse(current_state_value), pendulum.parse(latest_cursor_value))
|
169
|
-
current_stream_state[str(project_id)] = {self.cursor_field: max_value.to_iso8601_string()}
|
170
|
-
return current_stream_state
|
171
|
-
|
172
|
-
@staticmethod
|
173
|
-
def _chunk_date_range(start_point: datetime.datetime) -> Iterable[Tuple[str, str]]:
|
174
|
-
end_point = datetime.datetime.now(datetime.timezone.utc)
|
175
|
-
if start_point > end_point:
|
176
|
-
return []
|
177
|
-
current_start, current_end = start_point, start_point
|
178
|
-
while current_end < end_point:
|
179
|
-
current_end = current_start + datetime.timedelta(days=180)
|
180
|
-
current_end = min(current_end, end_point)
|
181
|
-
yield str(current_start), str(current_end)
|
182
|
-
current_start = current_end + datetime.timedelta(seconds=1)
|
183
|
-
|
184
|
-
def stream_slices(
|
185
|
-
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
186
|
-
) -> Iterable[Optional[Mapping[str, Any]]]:
|
187
|
-
stream_state = stream_state or {}
|
188
|
-
super_slices = super().stream_slices(sync_mode, cursor_field, stream_state)
|
189
|
-
for super_slice in super_slices:
|
190
|
-
state_project_value = stream_state.get(str(super_slice["id"]))
|
191
|
-
if self._start_date or state_project_value:
|
192
|
-
start_point = self._start_date
|
193
|
-
if state_project_value:
|
194
|
-
state_value = state_project_value.get(self.cursor_field)
|
195
|
-
if state_value and start_point:
|
196
|
-
start_point = max(start_point, state_value)
|
197
|
-
else:
|
198
|
-
start_point = state_value or start_point
|
199
|
-
for start_dt, end_dt in self._chunk_date_range(pendulum.parse(start_point)):
|
200
|
-
stream_slice = {key: value for key, value in super_slice.items()}
|
201
|
-
stream_slice[self.lower_bound_filter] = start_dt
|
202
|
-
stream_slice[self.upper_bound_filter] = end_dt
|
203
|
-
yield stream_slice
|
204
|
-
else:
|
205
|
-
stream_slice = {key: value for key, value in super_slice.items()}
|
206
|
-
yield stream_slice
|
207
|
-
|
208
|
-
def request_params(self, stream_state=None, stream_slice: Mapping[str, Any] = None, **kwargs):
|
209
|
-
params = super().request_params(stream_state, stream_slice, **kwargs)
|
210
|
-
lower_bound_filter = stream_slice.get(self.lower_bound_filter)
|
211
|
-
upper_bound_filter = stream_slice.get(self.upper_bound_filter)
|
212
|
-
if lower_bound_filter and upper_bound_filter:
|
213
|
-
params[self.lower_bound_filter] = lower_bound_filter
|
214
|
-
params[self.upper_bound_filter] = upper_bound_filter
|
215
|
-
return params
|
216
|
-
|
217
|
-
|
218
|
-
class Groups(GitlabStream):
|
219
|
-
use_cache = True
|
220
|
-
|
221
|
-
def __init__(self, group_ids: List, **kwargs):
|
222
|
-
super().__init__(**kwargs)
|
223
|
-
self.group_ids = group_ids
|
224
|
-
|
225
|
-
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
226
|
-
return f"groups/{stream_slice['id']}"
|
227
|
-
|
228
|
-
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
|
229
|
-
for gid in self.group_ids:
|
230
|
-
yield {"id": gid}
|
231
|
-
|
232
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
233
|
-
record["projects"] = [
|
234
|
-
{"id": project["id"], "path_with_namespace": project["path_with_namespace"]} for project in record.pop("projects", [])
|
235
|
-
]
|
236
|
-
return record
|
237
|
-
|
238
|
-
|
239
|
-
class IncludeDescendantGroups(Groups):
|
240
|
-
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
241
|
-
return stream_slice["path"]
|
242
|
-
|
243
|
-
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
|
244
|
-
for gid in self.group_ids:
|
245
|
-
yield {"path": f"groups/{gid}"}
|
246
|
-
yield {"path": f"groups/{gid}/descendant_groups"}
|
247
|
-
|
248
|
-
|
249
|
-
class GroupsList(GitlabStream):
|
250
|
-
def path(self, **kwargs) -> str:
|
251
|
-
return "groups"
|
252
|
-
|
253
|
-
|
254
|
-
class Projects(GitlabStream):
|
255
|
-
stream_base_params = {"statistics": 1}
|
256
|
-
use_cache = True
|
257
|
-
|
258
|
-
def __init__(self, project_ids: List = None, **kwargs):
|
259
|
-
super().__init__(**kwargs)
|
260
|
-
self.project_ids = project_ids
|
261
|
-
|
262
|
-
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
263
|
-
return f"projects/{stream_slice['id']}"
|
264
|
-
|
265
|
-
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
|
266
|
-
for pid in self.project_ids:
|
267
|
-
yield {"id": pid.replace("/", "%2F")}
|
268
|
-
|
269
|
-
|
270
|
-
class GroupProjects(Projects):
|
271
|
-
name = "projects"
|
272
|
-
|
273
|
-
def __init__(self, parent_stream: GitlabStream = None, **kwargs):
|
274
|
-
super().__init__(**kwargs)
|
275
|
-
self.parent_stream = parent_stream
|
276
|
-
|
277
|
-
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
|
278
|
-
group_project_ids = set()
|
279
|
-
for slice in self.parent_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
280
|
-
for record in self.parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=slice):
|
281
|
-
group_project_ids.update({i["path_with_namespace"] for i in record["projects"]})
|
282
|
-
for pid in group_project_ids:
|
283
|
-
if not self.project_ids or self.project_ids and pid in self.project_ids:
|
284
|
-
yield {"id": pid.replace("/", "%2F")}
|
285
|
-
|
286
|
-
|
287
|
-
class GroupMilestones(GitlabChildStream):
|
288
|
-
path_template = "groups/{id}/milestones"
|
289
|
-
|
290
|
-
|
291
|
-
class ProjectMilestones(GitlabChildStream):
|
292
|
-
path_template = "projects/{id}/milestones"
|
293
|
-
|
294
|
-
|
295
|
-
class GroupMembers(GitlabChildStream):
|
296
|
-
path_template = "groups/{id}/members"
|
297
|
-
flatten_parent_id = True
|
298
|
-
|
299
|
-
|
300
|
-
class ProjectMembers(GitlabChildStream):
|
301
|
-
path_template = "projects/{id}/members"
|
302
|
-
flatten_parent_id = True
|
303
|
-
|
304
|
-
|
305
|
-
class GroupLabels(GitlabChildStream):
|
306
|
-
path_template = "groups/{id}/labels"
|
307
|
-
flatten_parent_id = True
|
308
|
-
|
309
|
-
|
310
|
-
class ProjectLabels(GitlabChildStream):
|
311
|
-
path_template = "projects/{id}/labels"
|
312
|
-
flatten_parent_id = True
|
313
|
-
|
314
|
-
|
315
|
-
class Branches(GitlabChildStream):
|
316
|
-
primary_key = "name"
|
317
|
-
flatten_id_keys = ["commit"]
|
318
|
-
flatten_parent_id = True
|
319
|
-
|
320
|
-
|
321
|
-
class Commits(IncrementalGitlabChildStream):
|
322
|
-
cursor_field = "created_at"
|
323
|
-
lower_bound_filter = "since"
|
324
|
-
upper_bound_filter = "until"
|
325
|
-
flatten_parent_id = True
|
326
|
-
stream_base_params = {"with_stats": True}
|
327
|
-
|
328
|
-
|
329
|
-
class Issues(IncrementalGitlabChildStream):
|
330
|
-
stream_base_params = {"scope": "all"}
|
331
|
-
flatten_id_keys = ["author", "assignee", "closed_by", "milestone"]
|
332
|
-
flatten_list_keys = ["assignees"]
|
333
|
-
|
334
|
-
|
335
|
-
class MergeRequests(IncrementalGitlabChildStream):
|
336
|
-
stream_base_params = {"scope": "all"}
|
337
|
-
flatten_id_keys = ["author", "assignee", "closed_by", "milestone", "merged_by"]
|
338
|
-
flatten_list_keys = ["assignees"]
|
339
|
-
|
340
|
-
|
341
|
-
class MergeRequestCommits(GitlabChildStream):
|
342
|
-
"""Docs: https://docs.gitlab.com/ee/api/merge_requests.html#get-single-merge-request-commits"""
|
343
|
-
|
344
|
-
path_list = ["project_id", "iid"]
|
345
|
-
path_template = "projects/{project_id}/merge_requests/{iid}/commits"
|
346
|
-
|
347
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
348
|
-
super().transform(record, stream_slice, **kwargs)
|
349
|
-
record["project_id"] = stream_slice["project_id"]
|
350
|
-
record["merge_request_iid"] = stream_slice["iid"]
|
351
|
-
|
352
|
-
return record
|
353
|
-
|
354
|
-
|
355
|
-
class Releases(GitlabChildStream):
|
356
|
-
primary_key = "name"
|
357
|
-
flatten_id_keys = ["author", "commit"]
|
358
|
-
flatten_list_keys = ["milestones"]
|
359
|
-
|
360
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
361
|
-
super().transform(record, stream_slice, **kwargs)
|
362
|
-
record["project_id"] = stream_slice["id"]
|
363
|
-
|
364
|
-
return record
|
365
|
-
|
366
|
-
|
367
|
-
class Tags(GitlabChildStream):
|
368
|
-
primary_key = "name"
|
369
|
-
flatten_id_keys = ["commit"]
|
370
|
-
|
371
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
372
|
-
super().transform(record, stream_slice, **kwargs)
|
373
|
-
record["project_id"] = stream_slice["id"]
|
374
|
-
|
375
|
-
return record
|
376
|
-
|
377
|
-
|
378
|
-
class Pipelines(IncrementalGitlabChildStream):
|
379
|
-
pass
|
380
|
-
|
381
|
-
|
382
|
-
class PipelinesExtended(GitlabChildStream):
|
383
|
-
path_list = ["project_id", "id"]
|
384
|
-
path_template = "projects/{project_id}/pipelines/{id}"
|
385
|
-
|
386
|
-
|
387
|
-
class Jobs(GitlabChildStream):
|
388
|
-
flatten_id_keys = ["user", "pipeline", "runner", "commit"]
|
389
|
-
path_list = ["project_id", "id"]
|
390
|
-
path_template = "projects/{project_id}/pipelines/{id}/jobs"
|
391
|
-
|
392
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
393
|
-
super().transform(record, stream_slice, **kwargs)
|
394
|
-
record["project_id"] = stream_slice["project_id"]
|
395
|
-
return record
|
396
|
-
|
397
|
-
|
398
|
-
class GroupIssueBoards(GitlabChildStream):
|
399
|
-
path_template = "groups/{id}/boards"
|
400
|
-
flatten_parent_id = True
|
401
|
-
|
402
|
-
|
403
|
-
class Users(GitlabChildStream):
|
404
|
-
pass
|
405
|
-
|
406
|
-
|
407
|
-
class Epics(GitlabChildStream):
|
408
|
-
primary_key = "iid"
|
409
|
-
flatten_id_keys = ["author"]
|
410
|
-
|
411
|
-
|
412
|
-
class EpicIssues(GitlabChildStream):
|
413
|
-
primary_key = "epic_issue_id"
|
414
|
-
path_list = ["group_id", "iid"]
|
415
|
-
flatten_id_keys = ["milestone", "assignee", "author"]
|
416
|
-
flatten_list_keys = ["assignees"]
|
417
|
-
path_template = "groups/{group_id}/epics/{iid}/issues"
|
418
|
-
|
419
|
-
|
420
|
-
class Deployments(GitlabChildStream):
|
421
|
-
primary_key = "id"
|
422
|
-
flatten_id_keys = ["user", "environment"]
|
423
|
-
path_template = "projects/{id}/deployments"
|
424
|
-
|
425
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
426
|
-
super().transform(record, stream_slice, **kwargs)
|
427
|
-
record["user_username"] = record["user"]["username"]
|
428
|
-
record["user_full_name"] = record["user"]["name"]
|
429
|
-
record["environment_name"] = record["environment"]["name"]
|
430
|
-
record["project_id"] = stream_slice["id"]
|
431
|
-
return record
|
File without changes
|
File without changes
|