airbyte-source-gitlab 3.0.0.dev202403072311__py3-none-any.whl → 4.0.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- {airbyte_source_gitlab-3.0.0.dev202403072311.dist-info → airbyte_source_gitlab-4.0.1.dist-info}/METADATA +3 -3
- {airbyte_source_gitlab-3.0.0.dev202403072311.dist-info → airbyte_source_gitlab-4.0.1.dist-info}/RECORD +7 -6
- source_gitlab/components/partition_routers.py +43 -0
- source_gitlab/manifest.yaml +613 -0
- source_gitlab/source.py +9 -224
- source_gitlab/streams.py +0 -431
- {airbyte_source_gitlab-3.0.0.dev202403072311.dist-info → airbyte_source_gitlab-4.0.1.dist-info}/WHEEL +0 -0
- {airbyte_source_gitlab-3.0.0.dev202403072311.dist-info → airbyte_source_gitlab-4.0.1.dist-info}/entry_points.txt +0 -0
source_gitlab/source.py
CHANGED
@@ -1,245 +1,30 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
5
|
|
6
|
-
import
|
7
|
-
from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
|
6
|
+
from typing import Any, MutableMapping, Tuple
|
8
7
|
|
9
|
-
import
|
10
|
-
from airbyte_cdk.
|
11
|
-
from airbyte_cdk.models import SyncMode
|
12
|
-
from airbyte_cdk.sources import AbstractSource
|
13
|
-
from airbyte_cdk.sources.streams import Stream
|
14
|
-
from airbyte_cdk.sources.streams.http.requests_native_auth.oauth import SingleUseRefreshTokenOauth2Authenticator
|
15
|
-
from airbyte_cdk.sources.streams.http.requests_native_auth.token import TokenAuthenticator
|
16
|
-
from airbyte_cdk.utils import AirbyteTracedException
|
17
|
-
from requests.auth import AuthBase
|
18
|
-
from requests.exceptions import HTTPError
|
8
|
+
from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
|
9
|
+
from airbyte_cdk.utils import is_cloud_environment
|
19
10
|
|
20
|
-
from .streams import (
|
21
|
-
Branches,
|
22
|
-
Commits,
|
23
|
-
Deployments,
|
24
|
-
EpicIssues,
|
25
|
-
Epics,
|
26
|
-
GitlabStream,
|
27
|
-
GroupIssueBoards,
|
28
|
-
GroupLabels,
|
29
|
-
GroupMembers,
|
30
|
-
GroupMilestones,
|
31
|
-
GroupProjects,
|
32
|
-
Groups,
|
33
|
-
GroupsList,
|
34
|
-
IncludeDescendantGroups,
|
35
|
-
Issues,
|
36
|
-
Jobs,
|
37
|
-
MergeRequestCommits,
|
38
|
-
MergeRequests,
|
39
|
-
Pipelines,
|
40
|
-
PipelinesExtended,
|
41
|
-
ProjectLabels,
|
42
|
-
ProjectMembers,
|
43
|
-
ProjectMilestones,
|
44
|
-
Projects,
|
45
|
-
Releases,
|
46
|
-
Tags,
|
47
|
-
Users,
|
48
|
-
)
|
49
11
|
from .utils import parse_url
|
50
12
|
|
51
13
|
|
52
|
-
class
|
53
|
-
def __init__(self
|
54
|
-
super().__init__(
|
55
|
-
self._created_at_name = created_at_name
|
56
|
-
|
57
|
-
def get_created_at_name(self) -> str:
|
58
|
-
return self._created_at_name
|
59
|
-
|
60
|
-
def get_access_token(self) -> str:
|
61
|
-
if self.token_has_expired():
|
62
|
-
new_access_token, access_token_expires_in, access_token_created_at, new_refresh_token = self.refresh_access_token()
|
63
|
-
new_token_expiry_date = self.get_new_token_expiry_date(access_token_expires_in, access_token_created_at)
|
64
|
-
self.access_token = new_access_token
|
65
|
-
self.set_refresh_token(new_refresh_token)
|
66
|
-
self.set_token_expiry_date(new_token_expiry_date)
|
67
|
-
emit_configuration_as_airbyte_control_message(self._connector_config)
|
68
|
-
return self.access_token
|
69
|
-
|
70
|
-
@staticmethod
|
71
|
-
def get_new_token_expiry_date(access_token_expires_in: int, access_token_created_at: int) -> pendulum.DateTime:
|
72
|
-
return pendulum.from_timestamp(access_token_created_at + access_token_expires_in)
|
73
|
-
|
74
|
-
def refresh_access_token(self) -> Tuple[str, int, int, str]:
|
75
|
-
response_json = self._get_refresh_access_token_response()
|
76
|
-
return (
|
77
|
-
response_json[self.get_access_token_name()],
|
78
|
-
response_json[self.get_expires_in_name()],
|
79
|
-
response_json[self.get_created_at_name()],
|
80
|
-
response_json[self.get_refresh_token_name()],
|
81
|
-
)
|
82
|
-
|
83
|
-
|
84
|
-
def get_authenticator(config: MutableMapping) -> AuthBase:
|
85
|
-
if config["credentials"]["auth_type"] == "access_token":
|
86
|
-
return TokenAuthenticator(token=config["credentials"]["access_token"])
|
87
|
-
return SingleUseRefreshTokenGitlabOAuth2Authenticator(
|
88
|
-
config,
|
89
|
-
token_refresh_endpoint=f"https://{config['api_url']}/oauth/token",
|
90
|
-
refresh_token_error_status_codes=(400,),
|
91
|
-
refresh_token_error_key="error",
|
92
|
-
refresh_token_error_values="invalid_grant",
|
93
|
-
)
|
94
|
-
|
95
|
-
|
96
|
-
class SourceGitlab(AbstractSource):
|
97
|
-
def __init__(self, *args, **kwargs):
|
98
|
-
super().__init__(*args, **kwargs)
|
99
|
-
self.__auth_params: Mapping[str, Any] = {}
|
100
|
-
self.__groups_stream: Optional[GitlabStream] = None
|
101
|
-
self.__projects_stream: Optional[GitlabStream] = None
|
14
|
+
class SourceGitlab(YamlDeclarativeSource):
|
15
|
+
def __init__(self):
|
16
|
+
super().__init__(**{"path_to_yaml": "manifest.yaml"})
|
102
17
|
|
103
18
|
@staticmethod
|
104
19
|
def _ensure_default_values(config: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
105
20
|
config["api_url"] = config.get("api_url") or "gitlab.com"
|
106
21
|
return config
|
107
22
|
|
108
|
-
def _groups_stream(self, config: MutableMapping[str, Any]) -> Groups:
|
109
|
-
if not self.__groups_stream:
|
110
|
-
auth_params = self._auth_params(config)
|
111
|
-
group_ids = list(map(lambda x: x["id"], self._get_group_list(config)))
|
112
|
-
self.__groups_stream = Groups(group_ids=group_ids, **auth_params)
|
113
|
-
return self.__groups_stream
|
114
|
-
|
115
|
-
def _projects_stream(self, config: MutableMapping[str, Any]) -> Union[Projects, GroupProjects]:
|
116
|
-
if not self.__projects_stream:
|
117
|
-
auth_params = self._auth_params(config)
|
118
|
-
project_ids = config.get("projects_list", [])
|
119
|
-
groups_stream = self._groups_stream(config)
|
120
|
-
if groups_stream.group_ids:
|
121
|
-
self.__projects_stream = GroupProjects(project_ids=project_ids, parent_stream=groups_stream, **auth_params)
|
122
|
-
return self.__projects_stream
|
123
|
-
self.__projects_stream = Projects(project_ids=project_ids, **auth_params)
|
124
|
-
return self.__projects_stream
|
125
|
-
|
126
|
-
def _auth_params(self, config: MutableMapping[str, Any]) -> Mapping[str, Any]:
|
127
|
-
if not self.__auth_params:
|
128
|
-
auth = get_authenticator(config)
|
129
|
-
self.__auth_params = dict(authenticator=auth, api_url=config["api_url"])
|
130
|
-
return self.__auth_params
|
131
|
-
|
132
|
-
def _get_group_list(self, config: MutableMapping[str, Any]) -> List[str]:
|
133
|
-
group_ids = config.get("groups_list")
|
134
|
-
# Gitlab exposes different APIs to get a list of groups.
|
135
|
-
# We use https://docs.gitlab.com/ee/api/groups.html#list-groups in case there's no group IDs in the input config.
|
136
|
-
# This API provides full information about all available groups, including subgroups.
|
137
|
-
#
|
138
|
-
# In case there is a definitive list of groups IDs in the input config, the above API can not be used since
|
139
|
-
# it does not support filtering by group ID, so we use
|
140
|
-
# https://docs.gitlab.com/ee/api/groups.html#details-of-a-group and
|
141
|
-
# https: //docs.gitlab.com/ee/api/groups.html#list-a-groups-descendant-groups for each group ID. The latter one does not
|
142
|
-
# provide full group info so can only be used to retrieve alist of group IDs and pass it further to init a corresponding stream.
|
143
|
-
auth_params = self._auth_params(config)
|
144
|
-
stream = GroupsList(**auth_params) if not group_ids else IncludeDescendantGroups(group_ids=group_ids, **auth_params)
|
145
|
-
for stream_slice in stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
146
|
-
yield from stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice)
|
147
|
-
|
148
|
-
@staticmethod
|
149
|
-
def _is_http_allowed() -> bool:
|
150
|
-
return os.environ.get("DEPLOYMENT_MODE", "").upper() != "CLOUD"
|
151
|
-
|
152
|
-
def _try_refresh_access_token(self, logger, config: Mapping[str, Any]) -> Mapping[str, Any]:
|
153
|
-
"""
|
154
|
-
This method attempts to refresh the expired `access_token`, while `refresh_token` is still valid.
|
155
|
-
In order to obtain the new `refresh_token`, the Customer should `re-auth` in the source settings.
|
156
|
-
"""
|
157
|
-
# get current authenticator
|
158
|
-
authenticator: Union[SingleUseRefreshTokenOauth2Authenticator, TokenAuthenticator] = self.__auth_params.get("authenticator")
|
159
|
-
if isinstance(authenticator, SingleUseRefreshTokenOauth2Authenticator):
|
160
|
-
try:
|
161
|
-
creds = authenticator.refresh_access_token()
|
162
|
-
# update the actual config values
|
163
|
-
config["credentials"]["access_token"] = creds[0]
|
164
|
-
config["credentials"]["refresh_token"] = creds[3]
|
165
|
-
config["credentials"]["token_expiry_date"] = authenticator.get_new_token_expiry_date(creds[1], creds[2]).to_rfc3339_string()
|
166
|
-
# update the config
|
167
|
-
emit_configuration_as_airbyte_control_message(config)
|
168
|
-
logger.info("The `access_token` was successfully refreshed.")
|
169
|
-
return config
|
170
|
-
except (AirbyteTracedException, HTTPError) as http_error:
|
171
|
-
raise http_error
|
172
|
-
except Exception as e:
|
173
|
-
raise Exception(f"Unknown error occurred while refreshing the `access_token`, details: {e}")
|
174
|
-
|
175
|
-
def _handle_expired_access_token_error(self, logger, config: Mapping[str, Any]) -> Tuple[bool, Any]:
|
176
|
-
try:
|
177
|
-
return self.check_connection(logger, self._try_refresh_access_token(logger, config))
|
178
|
-
except HTTPError as http_error:
|
179
|
-
return False, f"Unable to refresh the `access_token`, please re-authenticate in Sources > Settings. Details: {http_error}"
|
180
|
-
|
181
23
|
def check_connection(self, logger, config) -> Tuple[bool, Any]:
|
182
24
|
config = self._ensure_default_values(config)
|
183
25
|
is_valid, scheme, _ = parse_url(config["api_url"])
|
184
26
|
if not is_valid:
|
185
27
|
return False, "Invalid API resource locator."
|
186
|
-
if scheme == "http" and
|
28
|
+
if scheme == "http" and is_cloud_environment():
|
187
29
|
return False, "Http scheme is not allowed in this environment. Please use `https` instead."
|
188
|
-
|
189
|
-
projects = self._projects_stream(config)
|
190
|
-
for stream_slice in projects.stream_slices(sync_mode=SyncMode.full_refresh):
|
191
|
-
try:
|
192
|
-
next(projects.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice))
|
193
|
-
return True, None
|
194
|
-
except StopIteration:
|
195
|
-
# in case groups/projects provided and 404 occurs
|
196
|
-
return False, "Groups and/or projects that you provide are invalid or you don't have permission to view it."
|
197
|
-
return True, None # in case there's no projects
|
198
|
-
except HTTPError as http_error:
|
199
|
-
if config["credentials"]["auth_type"] == "oauth2.0":
|
200
|
-
if http_error.response.status_code == 401:
|
201
|
-
return self._handle_expired_access_token_error(logger, config)
|
202
|
-
elif http_error.response.status_code == 500:
|
203
|
-
return False, f"Unable to connect to Gitlab API with the provided credentials - {repr(http_error)}"
|
204
|
-
else:
|
205
|
-
return False, f"Unable to connect to Gitlab API with the provided Private Access Token - {repr(http_error)}"
|
206
|
-
except Exception as error:
|
207
|
-
return False, f"Unknown error occurred while checking the connection - {repr(error)}"
|
208
|
-
|
209
|
-
def streams(self, config: MutableMapping[str, Any]) -> List[Stream]:
|
210
|
-
config = self._ensure_default_values(config)
|
211
|
-
auth_params = self._auth_params(config)
|
212
|
-
start_date = config.get("start_date")
|
213
|
-
|
214
|
-
groups, projects = self._groups_stream(config), self._projects_stream(config)
|
215
|
-
pipelines = Pipelines(parent_stream=projects, start_date=start_date, **auth_params)
|
216
|
-
merge_requests = MergeRequests(parent_stream=projects, start_date=start_date, **auth_params)
|
217
|
-
epics = Epics(parent_stream=groups, **auth_params)
|
218
|
-
|
219
|
-
streams = [
|
220
|
-
groups,
|
221
|
-
projects,
|
222
|
-
Branches(parent_stream=projects, repository_part=True, **auth_params),
|
223
|
-
Commits(parent_stream=projects, repository_part=True, start_date=start_date, **auth_params),
|
224
|
-
epics,
|
225
|
-
Deployments(parent_stream=projects, **auth_params),
|
226
|
-
EpicIssues(parent_stream=epics, **auth_params),
|
227
|
-
GroupIssueBoards(parent_stream=groups, **auth_params),
|
228
|
-
Issues(parent_stream=projects, start_date=start_date, **auth_params),
|
229
|
-
Jobs(parent_stream=pipelines, **auth_params),
|
230
|
-
ProjectMilestones(parent_stream=projects, **auth_params),
|
231
|
-
GroupMilestones(parent_stream=groups, **auth_params),
|
232
|
-
ProjectMembers(parent_stream=projects, **auth_params),
|
233
|
-
GroupMembers(parent_stream=groups, **auth_params),
|
234
|
-
ProjectLabels(parent_stream=projects, **auth_params),
|
235
|
-
GroupLabels(parent_stream=groups, **auth_params),
|
236
|
-
merge_requests,
|
237
|
-
MergeRequestCommits(parent_stream=merge_requests, **auth_params),
|
238
|
-
Releases(parent_stream=projects, **auth_params),
|
239
|
-
Tags(parent_stream=projects, repository_part=True, **auth_params),
|
240
|
-
pipelines,
|
241
|
-
PipelinesExtended(parent_stream=pipelines, **auth_params),
|
242
|
-
Users(parent_stream=projects, **auth_params),
|
243
|
-
]
|
244
|
-
|
245
|
-
return streams
|
30
|
+
return super().check_connection(logger, config)
|
source_gitlab/streams.py
DELETED
@@ -1,431 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import datetime
|
6
|
-
from abc import ABC
|
7
|
-
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Tuple
|
8
|
-
|
9
|
-
import pendulum
|
10
|
-
import requests
|
11
|
-
from airbyte_cdk.models import SyncMode
|
12
|
-
from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
|
13
|
-
from airbyte_cdk.sources.streams.core import StreamData
|
14
|
-
from airbyte_cdk.sources.streams.http import HttpStream
|
15
|
-
|
16
|
-
from .utils import parse_url
|
17
|
-
|
18
|
-
|
19
|
-
class GitlabStream(HttpStream, ABC):
|
20
|
-
primary_key = "id"
|
21
|
-
raise_on_http_errors = True
|
22
|
-
stream_base_params = {}
|
23
|
-
flatten_id_keys = []
|
24
|
-
flatten_list_keys = []
|
25
|
-
per_page = 50
|
26
|
-
non_retriable_codes: List[int] = (403, 404)
|
27
|
-
|
28
|
-
def __init__(self, api_url: str, **kwargs):
|
29
|
-
super().__init__(**kwargs)
|
30
|
-
self.api_url = api_url
|
31
|
-
self.page = 1
|
32
|
-
|
33
|
-
def read_records(
|
34
|
-
self,
|
35
|
-
sync_mode: SyncMode,
|
36
|
-
cursor_field: List[str] = None,
|
37
|
-
stream_slice: Mapping[str, Any] = None,
|
38
|
-
stream_state: Mapping[str, Any] = None,
|
39
|
-
) -> Iterable[StreamData]:
|
40
|
-
self.page = 1
|
41
|
-
yield from super().read_records(sync_mode, cursor_field, stream_slice, stream_state)
|
42
|
-
|
43
|
-
def request_params(
|
44
|
-
self,
|
45
|
-
stream_state: Mapping[str, Any],
|
46
|
-
stream_slice: Mapping[str, Any] = None,
|
47
|
-
next_page_token: Mapping[str, Any] = None,
|
48
|
-
) -> MutableMapping[str, Any]:
|
49
|
-
params = {"per_page": self.per_page}
|
50
|
-
if next_page_token:
|
51
|
-
params.update(next_page_token)
|
52
|
-
params.update(self.stream_base_params)
|
53
|
-
return params
|
54
|
-
|
55
|
-
@property
|
56
|
-
def url_base(self) -> str:
|
57
|
-
_, scheme, host = parse_url(self.api_url)
|
58
|
-
return f"{scheme}://{host}/api/v4/"
|
59
|
-
|
60
|
-
@property
|
61
|
-
def availability_strategy(self) -> Optional["AvailabilityStrategy"]:
|
62
|
-
return None
|
63
|
-
|
64
|
-
def should_retry(self, response: requests.Response) -> bool:
|
65
|
-
# Gitlab API returns a 403 response in case a feature is disabled in a project (pipelines/jobs for instance).
|
66
|
-
if response.status_code in self.non_retriable_codes:
|
67
|
-
setattr(self, "raise_on_http_errors", False)
|
68
|
-
self.logger.warning(
|
69
|
-
f"Got {response.status_code} error when accessing URL {response.request.url}."
|
70
|
-
f" Very likely the feature is disabled for this project and/or group. Please double check it, or report a bug otherwise."
|
71
|
-
)
|
72
|
-
return False
|
73
|
-
return super().should_retry(response)
|
74
|
-
|
75
|
-
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
76
|
-
if response.status_code in self.non_retriable_codes:
|
77
|
-
return
|
78
|
-
response_data = response.json()
|
79
|
-
if isinstance(response_data, dict):
|
80
|
-
return None
|
81
|
-
if len(response_data) == self.per_page:
|
82
|
-
self.page += 1
|
83
|
-
return {"page": self.page}
|
84
|
-
|
85
|
-
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
|
86
|
-
if response.status_code in self.non_retriable_codes:
|
87
|
-
return []
|
88
|
-
response_data = response.json()
|
89
|
-
if isinstance(response_data, list):
|
90
|
-
for record in response_data:
|
91
|
-
yield self.transform(record, **kwargs)
|
92
|
-
elif isinstance(response_data, dict):
|
93
|
-
yield self.transform(response_data, **kwargs)
|
94
|
-
else:
|
95
|
-
self.logger.info(f"Unsupported type of response data for stream {self.name}")
|
96
|
-
|
97
|
-
def transform(self, record: Dict[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs):
|
98
|
-
for key in self.flatten_id_keys:
|
99
|
-
self._flatten_id(record, key)
|
100
|
-
|
101
|
-
for key in self.flatten_list_keys:
|
102
|
-
self._flatten_list(record, key)
|
103
|
-
|
104
|
-
return record
|
105
|
-
|
106
|
-
def _flatten_id(self, record: Dict[str, Any], target: str):
|
107
|
-
target_value = record.get(target, None)
|
108
|
-
record[target + "_id"] = target_value.get("id") if target_value else None
|
109
|
-
|
110
|
-
def _flatten_list(self, record: Dict[str, Any], target: str):
|
111
|
-
record[target] = [target_data.get("id") for target_data in record.get(target, [])]
|
112
|
-
|
113
|
-
|
114
|
-
class GitlabChildStream(GitlabStream):
|
115
|
-
path_list = ["id"]
|
116
|
-
flatten_parent_id = False
|
117
|
-
|
118
|
-
def __init__(self, parent_stream: GitlabStream, repository_part: bool = False, **kwargs):
|
119
|
-
super().__init__(**kwargs)
|
120
|
-
self.parent_stream = parent_stream
|
121
|
-
self.repo_url = repository_part
|
122
|
-
|
123
|
-
@property
|
124
|
-
def path_template(self) -> str:
|
125
|
-
template = [self.parent_stream.name] + ["{" + path_key + "}" for path_key in self.path_list]
|
126
|
-
if self.repo_url:
|
127
|
-
template.append("repository")
|
128
|
-
return "/".join(template + [self.name])
|
129
|
-
|
130
|
-
def stream_slices(
|
131
|
-
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
132
|
-
) -> Iterable[Optional[Mapping[str, any]]]:
|
133
|
-
for slice in self.parent_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
134
|
-
for record in self.parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=slice):
|
135
|
-
yield {path_key: record[path_key] for path_key in self.path_list}
|
136
|
-
|
137
|
-
def path(self, stream_slice: Optional[Mapping[str, Any]] = None, **kwargs) -> str:
|
138
|
-
return self.path_template.format(**{path_key: stream_slice[path_key] for path_key in self.path_list})
|
139
|
-
|
140
|
-
def transform(self, record: Dict[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs):
|
141
|
-
super().transform(record, stream_slice, **kwargs)
|
142
|
-
if self.flatten_parent_id:
|
143
|
-
record[f"{self.parent_stream.name[:-1]}_id"] = stream_slice["id"]
|
144
|
-
return record
|
145
|
-
|
146
|
-
|
147
|
-
class IncrementalGitlabChildStream(GitlabChildStream):
|
148
|
-
state_checkpoint_interval = 100
|
149
|
-
cursor_field = "updated_at"
|
150
|
-
lower_bound_filter = "updated_after"
|
151
|
-
upper_bound_filter = "updated_before"
|
152
|
-
|
153
|
-
def __init__(self, start_date, **kwargs):
|
154
|
-
super().__init__(**kwargs)
|
155
|
-
self._start_date = start_date
|
156
|
-
|
157
|
-
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]:
|
158
|
-
"""
|
159
|
-
Return the latest state by comparing the cursor value in the latest record with the stream's most recent state object
|
160
|
-
and returning an updated state object.
|
161
|
-
"""
|
162
|
-
project_id = latest_record.get("project_id")
|
163
|
-
latest_cursor_value = latest_record.get(self.cursor_field)
|
164
|
-
current_state = current_stream_state.get(str(project_id))
|
165
|
-
if current_state:
|
166
|
-
current_state = current_state.get(self.cursor_field)
|
167
|
-
current_state_value = current_state or latest_cursor_value
|
168
|
-
max_value = max(pendulum.parse(current_state_value), pendulum.parse(latest_cursor_value))
|
169
|
-
current_stream_state[str(project_id)] = {self.cursor_field: max_value.to_iso8601_string()}
|
170
|
-
return current_stream_state
|
171
|
-
|
172
|
-
@staticmethod
|
173
|
-
def _chunk_date_range(start_point: datetime.datetime) -> Iterable[Tuple[str, str]]:
|
174
|
-
end_point = datetime.datetime.now(datetime.timezone.utc)
|
175
|
-
if start_point > end_point:
|
176
|
-
return []
|
177
|
-
current_start, current_end = start_point, start_point
|
178
|
-
while current_end < end_point:
|
179
|
-
current_end = current_start + datetime.timedelta(days=180)
|
180
|
-
current_end = min(current_end, end_point)
|
181
|
-
yield str(current_start), str(current_end)
|
182
|
-
current_start = current_end + datetime.timedelta(seconds=1)
|
183
|
-
|
184
|
-
def stream_slices(
|
185
|
-
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
186
|
-
) -> Iterable[Optional[Mapping[str, Any]]]:
|
187
|
-
stream_state = stream_state or {}
|
188
|
-
super_slices = super().stream_slices(sync_mode, cursor_field, stream_state)
|
189
|
-
for super_slice in super_slices:
|
190
|
-
state_project_value = stream_state.get(str(super_slice["id"]))
|
191
|
-
if self._start_date or state_project_value:
|
192
|
-
start_point = self._start_date
|
193
|
-
if state_project_value:
|
194
|
-
state_value = state_project_value.get(self.cursor_field)
|
195
|
-
if state_value and start_point:
|
196
|
-
start_point = max(start_point, state_value)
|
197
|
-
else:
|
198
|
-
start_point = state_value or start_point
|
199
|
-
for start_dt, end_dt in self._chunk_date_range(pendulum.parse(start_point)):
|
200
|
-
stream_slice = {key: value for key, value in super_slice.items()}
|
201
|
-
stream_slice[self.lower_bound_filter] = start_dt
|
202
|
-
stream_slice[self.upper_bound_filter] = end_dt
|
203
|
-
yield stream_slice
|
204
|
-
else:
|
205
|
-
stream_slice = {key: value for key, value in super_slice.items()}
|
206
|
-
yield stream_slice
|
207
|
-
|
208
|
-
def request_params(self, stream_state=None, stream_slice: Mapping[str, Any] = None, **kwargs):
|
209
|
-
params = super().request_params(stream_state, stream_slice, **kwargs)
|
210
|
-
lower_bound_filter = stream_slice.get(self.lower_bound_filter)
|
211
|
-
upper_bound_filter = stream_slice.get(self.upper_bound_filter)
|
212
|
-
if lower_bound_filter and upper_bound_filter:
|
213
|
-
params[self.lower_bound_filter] = lower_bound_filter
|
214
|
-
params[self.upper_bound_filter] = upper_bound_filter
|
215
|
-
return params
|
216
|
-
|
217
|
-
|
218
|
-
class Groups(GitlabStream):
|
219
|
-
use_cache = True
|
220
|
-
|
221
|
-
def __init__(self, group_ids: List, **kwargs):
|
222
|
-
super().__init__(**kwargs)
|
223
|
-
self.group_ids = group_ids
|
224
|
-
|
225
|
-
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
226
|
-
return f"groups/{stream_slice['id']}"
|
227
|
-
|
228
|
-
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
|
229
|
-
for gid in self.group_ids:
|
230
|
-
yield {"id": gid}
|
231
|
-
|
232
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
233
|
-
record["projects"] = [
|
234
|
-
{"id": project["id"], "path_with_namespace": project["path_with_namespace"]} for project in record.pop("projects", [])
|
235
|
-
]
|
236
|
-
return record
|
237
|
-
|
238
|
-
|
239
|
-
class IncludeDescendantGroups(Groups):
|
240
|
-
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
241
|
-
return stream_slice["path"]
|
242
|
-
|
243
|
-
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
|
244
|
-
for gid in self.group_ids:
|
245
|
-
yield {"path": f"groups/{gid}"}
|
246
|
-
yield {"path": f"groups/{gid}/descendant_groups"}
|
247
|
-
|
248
|
-
|
249
|
-
class GroupsList(GitlabStream):
|
250
|
-
def path(self, **kwargs) -> str:
|
251
|
-
return "groups"
|
252
|
-
|
253
|
-
|
254
|
-
class Projects(GitlabStream):
|
255
|
-
stream_base_params = {"statistics": 1}
|
256
|
-
use_cache = True
|
257
|
-
|
258
|
-
def __init__(self, project_ids: List = None, **kwargs):
|
259
|
-
super().__init__(**kwargs)
|
260
|
-
self.project_ids = project_ids
|
261
|
-
|
262
|
-
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
263
|
-
return f"projects/{stream_slice['id']}"
|
264
|
-
|
265
|
-
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
|
266
|
-
for pid in self.project_ids:
|
267
|
-
yield {"id": pid.replace("/", "%2F")}
|
268
|
-
|
269
|
-
|
270
|
-
class GroupProjects(Projects):
|
271
|
-
name = "projects"
|
272
|
-
|
273
|
-
def __init__(self, parent_stream: GitlabStream = None, **kwargs):
|
274
|
-
super().__init__(**kwargs)
|
275
|
-
self.parent_stream = parent_stream
|
276
|
-
|
277
|
-
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
|
278
|
-
group_project_ids = set()
|
279
|
-
for slice in self.parent_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
280
|
-
for record in self.parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=slice):
|
281
|
-
group_project_ids.update({i["path_with_namespace"] for i in record["projects"]})
|
282
|
-
for pid in group_project_ids:
|
283
|
-
if not self.project_ids or self.project_ids and pid in self.project_ids:
|
284
|
-
yield {"id": pid.replace("/", "%2F")}
|
285
|
-
|
286
|
-
|
287
|
-
class GroupMilestones(GitlabChildStream):
|
288
|
-
path_template = "groups/{id}/milestones"
|
289
|
-
|
290
|
-
|
291
|
-
class ProjectMilestones(GitlabChildStream):
|
292
|
-
path_template = "projects/{id}/milestones"
|
293
|
-
|
294
|
-
|
295
|
-
class GroupMembers(GitlabChildStream):
|
296
|
-
path_template = "groups/{id}/members"
|
297
|
-
flatten_parent_id = True
|
298
|
-
|
299
|
-
|
300
|
-
class ProjectMembers(GitlabChildStream):
|
301
|
-
path_template = "projects/{id}/members"
|
302
|
-
flatten_parent_id = True
|
303
|
-
|
304
|
-
|
305
|
-
class GroupLabels(GitlabChildStream):
|
306
|
-
path_template = "groups/{id}/labels"
|
307
|
-
flatten_parent_id = True
|
308
|
-
|
309
|
-
|
310
|
-
class ProjectLabels(GitlabChildStream):
|
311
|
-
path_template = "projects/{id}/labels"
|
312
|
-
flatten_parent_id = True
|
313
|
-
|
314
|
-
|
315
|
-
class Branches(GitlabChildStream):
|
316
|
-
primary_key = "name"
|
317
|
-
flatten_id_keys = ["commit"]
|
318
|
-
flatten_parent_id = True
|
319
|
-
|
320
|
-
|
321
|
-
class Commits(IncrementalGitlabChildStream):
|
322
|
-
cursor_field = "created_at"
|
323
|
-
lower_bound_filter = "since"
|
324
|
-
upper_bound_filter = "until"
|
325
|
-
flatten_parent_id = True
|
326
|
-
stream_base_params = {"with_stats": True}
|
327
|
-
|
328
|
-
|
329
|
-
class Issues(IncrementalGitlabChildStream):
|
330
|
-
stream_base_params = {"scope": "all"}
|
331
|
-
flatten_id_keys = ["author", "assignee", "closed_by", "milestone"]
|
332
|
-
flatten_list_keys = ["assignees"]
|
333
|
-
|
334
|
-
|
335
|
-
class MergeRequests(IncrementalGitlabChildStream):
|
336
|
-
stream_base_params = {"scope": "all"}
|
337
|
-
flatten_id_keys = ["author", "assignee", "closed_by", "milestone", "merged_by"]
|
338
|
-
flatten_list_keys = ["assignees"]
|
339
|
-
|
340
|
-
|
341
|
-
class MergeRequestCommits(GitlabChildStream):
|
342
|
-
"""Docs: https://docs.gitlab.com/ee/api/merge_requests.html#get-single-merge-request-commits"""
|
343
|
-
|
344
|
-
path_list = ["project_id", "iid"]
|
345
|
-
path_template = "projects/{project_id}/merge_requests/{iid}/commits"
|
346
|
-
|
347
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
348
|
-
super().transform(record, stream_slice, **kwargs)
|
349
|
-
record["project_id"] = stream_slice["project_id"]
|
350
|
-
record["merge_request_iid"] = stream_slice["iid"]
|
351
|
-
|
352
|
-
return record
|
353
|
-
|
354
|
-
|
355
|
-
class Releases(GitlabChildStream):
|
356
|
-
primary_key = "name"
|
357
|
-
flatten_id_keys = ["author", "commit"]
|
358
|
-
flatten_list_keys = ["milestones"]
|
359
|
-
|
360
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
361
|
-
super().transform(record, stream_slice, **kwargs)
|
362
|
-
record["project_id"] = stream_slice["id"]
|
363
|
-
|
364
|
-
return record
|
365
|
-
|
366
|
-
|
367
|
-
class Tags(GitlabChildStream):
|
368
|
-
primary_key = "name"
|
369
|
-
flatten_id_keys = ["commit"]
|
370
|
-
|
371
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
372
|
-
super().transform(record, stream_slice, **kwargs)
|
373
|
-
record["project_id"] = stream_slice["id"]
|
374
|
-
|
375
|
-
return record
|
376
|
-
|
377
|
-
|
378
|
-
class Pipelines(IncrementalGitlabChildStream):
|
379
|
-
pass
|
380
|
-
|
381
|
-
|
382
|
-
class PipelinesExtended(GitlabChildStream):
|
383
|
-
path_list = ["project_id", "id"]
|
384
|
-
path_template = "projects/{project_id}/pipelines/{id}"
|
385
|
-
|
386
|
-
|
387
|
-
class Jobs(GitlabChildStream):
|
388
|
-
flatten_id_keys = ["user", "pipeline", "runner", "commit"]
|
389
|
-
path_list = ["project_id", "id"]
|
390
|
-
path_template = "projects/{project_id}/pipelines/{id}/jobs"
|
391
|
-
|
392
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
393
|
-
super().transform(record, stream_slice, **kwargs)
|
394
|
-
record["project_id"] = stream_slice["project_id"]
|
395
|
-
return record
|
396
|
-
|
397
|
-
|
398
|
-
class GroupIssueBoards(GitlabChildStream):
|
399
|
-
path_template = "groups/{id}/boards"
|
400
|
-
flatten_parent_id = True
|
401
|
-
|
402
|
-
|
403
|
-
class Users(GitlabChildStream):
|
404
|
-
pass
|
405
|
-
|
406
|
-
|
407
|
-
class Epics(GitlabChildStream):
|
408
|
-
primary_key = "iid"
|
409
|
-
flatten_id_keys = ["author"]
|
410
|
-
|
411
|
-
|
412
|
-
class EpicIssues(GitlabChildStream):
|
413
|
-
primary_key = "epic_issue_id"
|
414
|
-
path_list = ["group_id", "iid"]
|
415
|
-
flatten_id_keys = ["milestone", "assignee", "author"]
|
416
|
-
flatten_list_keys = ["assignees"]
|
417
|
-
path_template = "groups/{group_id}/epics/{iid}/issues"
|
418
|
-
|
419
|
-
|
420
|
-
class Deployments(GitlabChildStream):
|
421
|
-
primary_key = "id"
|
422
|
-
flatten_id_keys = ["user", "environment"]
|
423
|
-
path_template = "projects/{id}/deployments"
|
424
|
-
|
425
|
-
def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
|
426
|
-
super().transform(record, stream_slice, **kwargs)
|
427
|
-
record["user_username"] = record["user"]["username"]
|
428
|
-
record["user_full_name"] = record["user"]["name"]
|
429
|
-
record["environment_name"] = record["environment"]["name"]
|
430
|
-
record["project_id"] = stream_slice["id"]
|
431
|
-
return record
|
File without changes
|
File without changes
|