airbyte-source-gitlab 3.0.0.dev202403072311__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
source_gitlab/source.py CHANGED
@@ -1,245 +1,30 @@
1
1
  #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
5
 
6
- import os
7
- from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
6
+ from typing import Any, MutableMapping, Tuple
8
7
 
9
- import pendulum
10
- from airbyte_cdk.config_observation import emit_configuration_as_airbyte_control_message
11
- from airbyte_cdk.models import SyncMode
12
- from airbyte_cdk.sources import AbstractSource
13
- from airbyte_cdk.sources.streams import Stream
14
- from airbyte_cdk.sources.streams.http.requests_native_auth.oauth import SingleUseRefreshTokenOauth2Authenticator
15
- from airbyte_cdk.sources.streams.http.requests_native_auth.token import TokenAuthenticator
16
- from airbyte_cdk.utils import AirbyteTracedException
17
- from requests.auth import AuthBase
18
- from requests.exceptions import HTTPError
8
+ from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
9
+ from airbyte_cdk.utils import is_cloud_environment
19
10
 
20
- from .streams import (
21
- Branches,
22
- Commits,
23
- Deployments,
24
- EpicIssues,
25
- Epics,
26
- GitlabStream,
27
- GroupIssueBoards,
28
- GroupLabels,
29
- GroupMembers,
30
- GroupMilestones,
31
- GroupProjects,
32
- Groups,
33
- GroupsList,
34
- IncludeDescendantGroups,
35
- Issues,
36
- Jobs,
37
- MergeRequestCommits,
38
- MergeRequests,
39
- Pipelines,
40
- PipelinesExtended,
41
- ProjectLabels,
42
- ProjectMembers,
43
- ProjectMilestones,
44
- Projects,
45
- Releases,
46
- Tags,
47
- Users,
48
- )
49
11
  from .utils import parse_url
50
12
 
51
13
 
52
- class SingleUseRefreshTokenGitlabOAuth2Authenticator(SingleUseRefreshTokenOauth2Authenticator):
53
- def __init__(self, *args, created_at_name: str = "created_at", **kwargs):
54
- super().__init__(*args, **kwargs)
55
- self._created_at_name = created_at_name
56
-
57
- def get_created_at_name(self) -> str:
58
- return self._created_at_name
59
-
60
- def get_access_token(self) -> str:
61
- if self.token_has_expired():
62
- new_access_token, access_token_expires_in, access_token_created_at, new_refresh_token = self.refresh_access_token()
63
- new_token_expiry_date = self.get_new_token_expiry_date(access_token_expires_in, access_token_created_at)
64
- self.access_token = new_access_token
65
- self.set_refresh_token(new_refresh_token)
66
- self.set_token_expiry_date(new_token_expiry_date)
67
- emit_configuration_as_airbyte_control_message(self._connector_config)
68
- return self.access_token
69
-
70
- @staticmethod
71
- def get_new_token_expiry_date(access_token_expires_in: int, access_token_created_at: int) -> pendulum.DateTime:
72
- return pendulum.from_timestamp(access_token_created_at + access_token_expires_in)
73
-
74
- def refresh_access_token(self) -> Tuple[str, int, int, str]:
75
- response_json = self._get_refresh_access_token_response()
76
- return (
77
- response_json[self.get_access_token_name()],
78
- response_json[self.get_expires_in_name()],
79
- response_json[self.get_created_at_name()],
80
- response_json[self.get_refresh_token_name()],
81
- )
82
-
83
-
84
- def get_authenticator(config: MutableMapping) -> AuthBase:
85
- if config["credentials"]["auth_type"] == "access_token":
86
- return TokenAuthenticator(token=config["credentials"]["access_token"])
87
- return SingleUseRefreshTokenGitlabOAuth2Authenticator(
88
- config,
89
- token_refresh_endpoint=f"https://{config['api_url']}/oauth/token",
90
- refresh_token_error_status_codes=(400,),
91
- refresh_token_error_key="error",
92
- refresh_token_error_values="invalid_grant",
93
- )
94
-
95
-
96
- class SourceGitlab(AbstractSource):
97
- def __init__(self, *args, **kwargs):
98
- super().__init__(*args, **kwargs)
99
- self.__auth_params: Mapping[str, Any] = {}
100
- self.__groups_stream: Optional[GitlabStream] = None
101
- self.__projects_stream: Optional[GitlabStream] = None
14
+ class SourceGitlab(YamlDeclarativeSource):
15
+ def __init__(self):
16
+ super().__init__(**{"path_to_yaml": "manifest.yaml"})
102
17
 
103
18
  @staticmethod
104
19
  def _ensure_default_values(config: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
105
20
  config["api_url"] = config.get("api_url") or "gitlab.com"
106
21
  return config
107
22
 
108
- def _groups_stream(self, config: MutableMapping[str, Any]) -> Groups:
109
- if not self.__groups_stream:
110
- auth_params = self._auth_params(config)
111
- group_ids = list(map(lambda x: x["id"], self._get_group_list(config)))
112
- self.__groups_stream = Groups(group_ids=group_ids, **auth_params)
113
- return self.__groups_stream
114
-
115
- def _projects_stream(self, config: MutableMapping[str, Any]) -> Union[Projects, GroupProjects]:
116
- if not self.__projects_stream:
117
- auth_params = self._auth_params(config)
118
- project_ids = config.get("projects_list", [])
119
- groups_stream = self._groups_stream(config)
120
- if groups_stream.group_ids:
121
- self.__projects_stream = GroupProjects(project_ids=project_ids, parent_stream=groups_stream, **auth_params)
122
- return self.__projects_stream
123
- self.__projects_stream = Projects(project_ids=project_ids, **auth_params)
124
- return self.__projects_stream
125
-
126
- def _auth_params(self, config: MutableMapping[str, Any]) -> Mapping[str, Any]:
127
- if not self.__auth_params:
128
- auth = get_authenticator(config)
129
- self.__auth_params = dict(authenticator=auth, api_url=config["api_url"])
130
- return self.__auth_params
131
-
132
- def _get_group_list(self, config: MutableMapping[str, Any]) -> List[str]:
133
- group_ids = config.get("groups_list")
134
- # Gitlab exposes different APIs to get a list of groups.
135
- # We use https://docs.gitlab.com/ee/api/groups.html#list-groups in case there's no group IDs in the input config.
136
- # This API provides full information about all available groups, including subgroups.
137
- #
138
- # In case there is a definitive list of groups IDs in the input config, the above API can not be used since
139
- # it does not support filtering by group ID, so we use
140
- # https://docs.gitlab.com/ee/api/groups.html#details-of-a-group and
141
- # https: //docs.gitlab.com/ee/api/groups.html#list-a-groups-descendant-groups for each group ID. The latter one does not
142
- # provide full group info so can only be used to retrieve alist of group IDs and pass it further to init a corresponding stream.
143
- auth_params = self._auth_params(config)
144
- stream = GroupsList(**auth_params) if not group_ids else IncludeDescendantGroups(group_ids=group_ids, **auth_params)
145
- for stream_slice in stream.stream_slices(sync_mode=SyncMode.full_refresh):
146
- yield from stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice)
147
-
148
- @staticmethod
149
- def _is_http_allowed() -> bool:
150
- return os.environ.get("DEPLOYMENT_MODE", "").upper() != "CLOUD"
151
-
152
- def _try_refresh_access_token(self, logger, config: Mapping[str, Any]) -> Mapping[str, Any]:
153
- """
154
- This method attempts to refresh the expired `access_token`, while `refresh_token` is still valid.
155
- In order to obtain the new `refresh_token`, the Customer should `re-auth` in the source settings.
156
- """
157
- # get current authenticator
158
- authenticator: Union[SingleUseRefreshTokenOauth2Authenticator, TokenAuthenticator] = self.__auth_params.get("authenticator")
159
- if isinstance(authenticator, SingleUseRefreshTokenOauth2Authenticator):
160
- try:
161
- creds = authenticator.refresh_access_token()
162
- # update the actual config values
163
- config["credentials"]["access_token"] = creds[0]
164
- config["credentials"]["refresh_token"] = creds[3]
165
- config["credentials"]["token_expiry_date"] = authenticator.get_new_token_expiry_date(creds[1], creds[2]).to_rfc3339_string()
166
- # update the config
167
- emit_configuration_as_airbyte_control_message(config)
168
- logger.info("The `access_token` was successfully refreshed.")
169
- return config
170
- except (AirbyteTracedException, HTTPError) as http_error:
171
- raise http_error
172
- except Exception as e:
173
- raise Exception(f"Unknown error occurred while refreshing the `access_token`, details: {e}")
174
-
175
- def _handle_expired_access_token_error(self, logger, config: Mapping[str, Any]) -> Tuple[bool, Any]:
176
- try:
177
- return self.check_connection(logger, self._try_refresh_access_token(logger, config))
178
- except HTTPError as http_error:
179
- return False, f"Unable to refresh the `access_token`, please re-authenticate in Sources > Settings. Details: {http_error}"
180
-
181
23
  def check_connection(self, logger, config) -> Tuple[bool, Any]:
182
24
  config = self._ensure_default_values(config)
183
25
  is_valid, scheme, _ = parse_url(config["api_url"])
184
26
  if not is_valid:
185
27
  return False, "Invalid API resource locator."
186
- if scheme == "http" and not self._is_http_allowed():
28
+ if scheme == "http" and is_cloud_environment():
187
29
  return False, "Http scheme is not allowed in this environment. Please use `https` instead."
188
- try:
189
- projects = self._projects_stream(config)
190
- for stream_slice in projects.stream_slices(sync_mode=SyncMode.full_refresh):
191
- try:
192
- next(projects.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice))
193
- return True, None
194
- except StopIteration:
195
- # in case groups/projects provided and 404 occurs
196
- return False, "Groups and/or projects that you provide are invalid or you don't have permission to view it."
197
- return True, None # in case there's no projects
198
- except HTTPError as http_error:
199
- if config["credentials"]["auth_type"] == "oauth2.0":
200
- if http_error.response.status_code == 401:
201
- return self._handle_expired_access_token_error(logger, config)
202
- elif http_error.response.status_code == 500:
203
- return False, f"Unable to connect to Gitlab API with the provided credentials - {repr(http_error)}"
204
- else:
205
- return False, f"Unable to connect to Gitlab API with the provided Private Access Token - {repr(http_error)}"
206
- except Exception as error:
207
- return False, f"Unknown error occurred while checking the connection - {repr(error)}"
208
-
209
- def streams(self, config: MutableMapping[str, Any]) -> List[Stream]:
210
- config = self._ensure_default_values(config)
211
- auth_params = self._auth_params(config)
212
- start_date = config.get("start_date")
213
-
214
- groups, projects = self._groups_stream(config), self._projects_stream(config)
215
- pipelines = Pipelines(parent_stream=projects, start_date=start_date, **auth_params)
216
- merge_requests = MergeRequests(parent_stream=projects, start_date=start_date, **auth_params)
217
- epics = Epics(parent_stream=groups, **auth_params)
218
-
219
- streams = [
220
- groups,
221
- projects,
222
- Branches(parent_stream=projects, repository_part=True, **auth_params),
223
- Commits(parent_stream=projects, repository_part=True, start_date=start_date, **auth_params),
224
- epics,
225
- Deployments(parent_stream=projects, **auth_params),
226
- EpicIssues(parent_stream=epics, **auth_params),
227
- GroupIssueBoards(parent_stream=groups, **auth_params),
228
- Issues(parent_stream=projects, start_date=start_date, **auth_params),
229
- Jobs(parent_stream=pipelines, **auth_params),
230
- ProjectMilestones(parent_stream=projects, **auth_params),
231
- GroupMilestones(parent_stream=groups, **auth_params),
232
- ProjectMembers(parent_stream=projects, **auth_params),
233
- GroupMembers(parent_stream=groups, **auth_params),
234
- ProjectLabels(parent_stream=projects, **auth_params),
235
- GroupLabels(parent_stream=groups, **auth_params),
236
- merge_requests,
237
- MergeRequestCommits(parent_stream=merge_requests, **auth_params),
238
- Releases(parent_stream=projects, **auth_params),
239
- Tags(parent_stream=projects, repository_part=True, **auth_params),
240
- pipelines,
241
- PipelinesExtended(parent_stream=pipelines, **auth_params),
242
- Users(parent_stream=projects, **auth_params),
243
- ]
244
-
245
- return streams
30
+ return super().check_connection(logger, config)
source_gitlab/streams.py DELETED
@@ -1,431 +0,0 @@
1
- #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- import datetime
6
- from abc import ABC
7
- from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Tuple
8
-
9
- import pendulum
10
- import requests
11
- from airbyte_cdk.models import SyncMode
12
- from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
13
- from airbyte_cdk.sources.streams.core import StreamData
14
- from airbyte_cdk.sources.streams.http import HttpStream
15
-
16
- from .utils import parse_url
17
-
18
-
19
- class GitlabStream(HttpStream, ABC):
20
- primary_key = "id"
21
- raise_on_http_errors = True
22
- stream_base_params = {}
23
- flatten_id_keys = []
24
- flatten_list_keys = []
25
- per_page = 50
26
- non_retriable_codes: List[int] = (403, 404)
27
-
28
- def __init__(self, api_url: str, **kwargs):
29
- super().__init__(**kwargs)
30
- self.api_url = api_url
31
- self.page = 1
32
-
33
- def read_records(
34
- self,
35
- sync_mode: SyncMode,
36
- cursor_field: List[str] = None,
37
- stream_slice: Mapping[str, Any] = None,
38
- stream_state: Mapping[str, Any] = None,
39
- ) -> Iterable[StreamData]:
40
- self.page = 1
41
- yield from super().read_records(sync_mode, cursor_field, stream_slice, stream_state)
42
-
43
- def request_params(
44
- self,
45
- stream_state: Mapping[str, Any],
46
- stream_slice: Mapping[str, Any] = None,
47
- next_page_token: Mapping[str, Any] = None,
48
- ) -> MutableMapping[str, Any]:
49
- params = {"per_page": self.per_page}
50
- if next_page_token:
51
- params.update(next_page_token)
52
- params.update(self.stream_base_params)
53
- return params
54
-
55
- @property
56
- def url_base(self) -> str:
57
- _, scheme, host = parse_url(self.api_url)
58
- return f"{scheme}://{host}/api/v4/"
59
-
60
- @property
61
- def availability_strategy(self) -> Optional["AvailabilityStrategy"]:
62
- return None
63
-
64
- def should_retry(self, response: requests.Response) -> bool:
65
- # Gitlab API returns a 403 response in case a feature is disabled in a project (pipelines/jobs for instance).
66
- if response.status_code in self.non_retriable_codes:
67
- setattr(self, "raise_on_http_errors", False)
68
- self.logger.warning(
69
- f"Got {response.status_code} error when accessing URL {response.request.url}."
70
- f" Very likely the feature is disabled for this project and/or group. Please double check it, or report a bug otherwise."
71
- )
72
- return False
73
- return super().should_retry(response)
74
-
75
- def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
76
- if response.status_code in self.non_retriable_codes:
77
- return
78
- response_data = response.json()
79
- if isinstance(response_data, dict):
80
- return None
81
- if len(response_data) == self.per_page:
82
- self.page += 1
83
- return {"page": self.page}
84
-
85
- def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
86
- if response.status_code in self.non_retriable_codes:
87
- return []
88
- response_data = response.json()
89
- if isinstance(response_data, list):
90
- for record in response_data:
91
- yield self.transform(record, **kwargs)
92
- elif isinstance(response_data, dict):
93
- yield self.transform(response_data, **kwargs)
94
- else:
95
- self.logger.info(f"Unsupported type of response data for stream {self.name}")
96
-
97
- def transform(self, record: Dict[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs):
98
- for key in self.flatten_id_keys:
99
- self._flatten_id(record, key)
100
-
101
- for key in self.flatten_list_keys:
102
- self._flatten_list(record, key)
103
-
104
- return record
105
-
106
- def _flatten_id(self, record: Dict[str, Any], target: str):
107
- target_value = record.get(target, None)
108
- record[target + "_id"] = target_value.get("id") if target_value else None
109
-
110
- def _flatten_list(self, record: Dict[str, Any], target: str):
111
- record[target] = [target_data.get("id") for target_data in record.get(target, [])]
112
-
113
-
114
- class GitlabChildStream(GitlabStream):
115
- path_list = ["id"]
116
- flatten_parent_id = False
117
-
118
- def __init__(self, parent_stream: GitlabStream, repository_part: bool = False, **kwargs):
119
- super().__init__(**kwargs)
120
- self.parent_stream = parent_stream
121
- self.repo_url = repository_part
122
-
123
- @property
124
- def path_template(self) -> str:
125
- template = [self.parent_stream.name] + ["{" + path_key + "}" for path_key in self.path_list]
126
- if self.repo_url:
127
- template.append("repository")
128
- return "/".join(template + [self.name])
129
-
130
- def stream_slices(
131
- self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
132
- ) -> Iterable[Optional[Mapping[str, any]]]:
133
- for slice in self.parent_stream.stream_slices(sync_mode=SyncMode.full_refresh):
134
- for record in self.parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=slice):
135
- yield {path_key: record[path_key] for path_key in self.path_list}
136
-
137
- def path(self, stream_slice: Optional[Mapping[str, Any]] = None, **kwargs) -> str:
138
- return self.path_template.format(**{path_key: stream_slice[path_key] for path_key in self.path_list})
139
-
140
- def transform(self, record: Dict[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs):
141
- super().transform(record, stream_slice, **kwargs)
142
- if self.flatten_parent_id:
143
- record[f"{self.parent_stream.name[:-1]}_id"] = stream_slice["id"]
144
- return record
145
-
146
-
147
- class IncrementalGitlabChildStream(GitlabChildStream):
148
- state_checkpoint_interval = 100
149
- cursor_field = "updated_at"
150
- lower_bound_filter = "updated_after"
151
- upper_bound_filter = "updated_before"
152
-
153
- def __init__(self, start_date, **kwargs):
154
- super().__init__(**kwargs)
155
- self._start_date = start_date
156
-
157
- def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]:
158
- """
159
- Return the latest state by comparing the cursor value in the latest record with the stream's most recent state object
160
- and returning an updated state object.
161
- """
162
- project_id = latest_record.get("project_id")
163
- latest_cursor_value = latest_record.get(self.cursor_field)
164
- current_state = current_stream_state.get(str(project_id))
165
- if current_state:
166
- current_state = current_state.get(self.cursor_field)
167
- current_state_value = current_state or latest_cursor_value
168
- max_value = max(pendulum.parse(current_state_value), pendulum.parse(latest_cursor_value))
169
- current_stream_state[str(project_id)] = {self.cursor_field: max_value.to_iso8601_string()}
170
- return current_stream_state
171
-
172
- @staticmethod
173
- def _chunk_date_range(start_point: datetime.datetime) -> Iterable[Tuple[str, str]]:
174
- end_point = datetime.datetime.now(datetime.timezone.utc)
175
- if start_point > end_point:
176
- return []
177
- current_start, current_end = start_point, start_point
178
- while current_end < end_point:
179
- current_end = current_start + datetime.timedelta(days=180)
180
- current_end = min(current_end, end_point)
181
- yield str(current_start), str(current_end)
182
- current_start = current_end + datetime.timedelta(seconds=1)
183
-
184
- def stream_slices(
185
- self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
186
- ) -> Iterable[Optional[Mapping[str, Any]]]:
187
- stream_state = stream_state or {}
188
- super_slices = super().stream_slices(sync_mode, cursor_field, stream_state)
189
- for super_slice in super_slices:
190
- state_project_value = stream_state.get(str(super_slice["id"]))
191
- if self._start_date or state_project_value:
192
- start_point = self._start_date
193
- if state_project_value:
194
- state_value = state_project_value.get(self.cursor_field)
195
- if state_value and start_point:
196
- start_point = max(start_point, state_value)
197
- else:
198
- start_point = state_value or start_point
199
- for start_dt, end_dt in self._chunk_date_range(pendulum.parse(start_point)):
200
- stream_slice = {key: value for key, value in super_slice.items()}
201
- stream_slice[self.lower_bound_filter] = start_dt
202
- stream_slice[self.upper_bound_filter] = end_dt
203
- yield stream_slice
204
- else:
205
- stream_slice = {key: value for key, value in super_slice.items()}
206
- yield stream_slice
207
-
208
- def request_params(self, stream_state=None, stream_slice: Mapping[str, Any] = None, **kwargs):
209
- params = super().request_params(stream_state, stream_slice, **kwargs)
210
- lower_bound_filter = stream_slice.get(self.lower_bound_filter)
211
- upper_bound_filter = stream_slice.get(self.upper_bound_filter)
212
- if lower_bound_filter and upper_bound_filter:
213
- params[self.lower_bound_filter] = lower_bound_filter
214
- params[self.upper_bound_filter] = upper_bound_filter
215
- return params
216
-
217
-
218
- class Groups(GitlabStream):
219
- use_cache = True
220
-
221
- def __init__(self, group_ids: List, **kwargs):
222
- super().__init__(**kwargs)
223
- self.group_ids = group_ids
224
-
225
- def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
226
- return f"groups/{stream_slice['id']}"
227
-
228
- def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
229
- for gid in self.group_ids:
230
- yield {"id": gid}
231
-
232
- def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
233
- record["projects"] = [
234
- {"id": project["id"], "path_with_namespace": project["path_with_namespace"]} for project in record.pop("projects", [])
235
- ]
236
- return record
237
-
238
-
239
- class IncludeDescendantGroups(Groups):
240
- def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
241
- return stream_slice["path"]
242
-
243
- def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
244
- for gid in self.group_ids:
245
- yield {"path": f"groups/{gid}"}
246
- yield {"path": f"groups/{gid}/descendant_groups"}
247
-
248
-
249
- class GroupsList(GitlabStream):
250
- def path(self, **kwargs) -> str:
251
- return "groups"
252
-
253
-
254
- class Projects(GitlabStream):
255
- stream_base_params = {"statistics": 1}
256
- use_cache = True
257
-
258
- def __init__(self, project_ids: List = None, **kwargs):
259
- super().__init__(**kwargs)
260
- self.project_ids = project_ids
261
-
262
- def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
263
- return f"projects/{stream_slice['id']}"
264
-
265
- def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
266
- for pid in self.project_ids:
267
- yield {"id": pid.replace("/", "%2F")}
268
-
269
-
270
- class GroupProjects(Projects):
271
- name = "projects"
272
-
273
- def __init__(self, parent_stream: GitlabStream = None, **kwargs):
274
- super().__init__(**kwargs)
275
- self.parent_stream = parent_stream
276
-
277
- def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
278
- group_project_ids = set()
279
- for slice in self.parent_stream.stream_slices(sync_mode=SyncMode.full_refresh):
280
- for record in self.parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=slice):
281
- group_project_ids.update({i["path_with_namespace"] for i in record["projects"]})
282
- for pid in group_project_ids:
283
- if not self.project_ids or self.project_ids and pid in self.project_ids:
284
- yield {"id": pid.replace("/", "%2F")}
285
-
286
-
287
- class GroupMilestones(GitlabChildStream):
288
- path_template = "groups/{id}/milestones"
289
-
290
-
291
- class ProjectMilestones(GitlabChildStream):
292
- path_template = "projects/{id}/milestones"
293
-
294
-
295
- class GroupMembers(GitlabChildStream):
296
- path_template = "groups/{id}/members"
297
- flatten_parent_id = True
298
-
299
-
300
- class ProjectMembers(GitlabChildStream):
301
- path_template = "projects/{id}/members"
302
- flatten_parent_id = True
303
-
304
-
305
- class GroupLabels(GitlabChildStream):
306
- path_template = "groups/{id}/labels"
307
- flatten_parent_id = True
308
-
309
-
310
- class ProjectLabels(GitlabChildStream):
311
- path_template = "projects/{id}/labels"
312
- flatten_parent_id = True
313
-
314
-
315
- class Branches(GitlabChildStream):
316
- primary_key = "name"
317
- flatten_id_keys = ["commit"]
318
- flatten_parent_id = True
319
-
320
-
321
- class Commits(IncrementalGitlabChildStream):
322
- cursor_field = "created_at"
323
- lower_bound_filter = "since"
324
- upper_bound_filter = "until"
325
- flatten_parent_id = True
326
- stream_base_params = {"with_stats": True}
327
-
328
-
329
- class Issues(IncrementalGitlabChildStream):
330
- stream_base_params = {"scope": "all"}
331
- flatten_id_keys = ["author", "assignee", "closed_by", "milestone"]
332
- flatten_list_keys = ["assignees"]
333
-
334
-
335
- class MergeRequests(IncrementalGitlabChildStream):
336
- stream_base_params = {"scope": "all"}
337
- flatten_id_keys = ["author", "assignee", "closed_by", "milestone", "merged_by"]
338
- flatten_list_keys = ["assignees"]
339
-
340
-
341
- class MergeRequestCommits(GitlabChildStream):
342
- """Docs: https://docs.gitlab.com/ee/api/merge_requests.html#get-single-merge-request-commits"""
343
-
344
- path_list = ["project_id", "iid"]
345
- path_template = "projects/{project_id}/merge_requests/{iid}/commits"
346
-
347
- def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
348
- super().transform(record, stream_slice, **kwargs)
349
- record["project_id"] = stream_slice["project_id"]
350
- record["merge_request_iid"] = stream_slice["iid"]
351
-
352
- return record
353
-
354
-
355
- class Releases(GitlabChildStream):
356
- primary_key = "name"
357
- flatten_id_keys = ["author", "commit"]
358
- flatten_list_keys = ["milestones"]
359
-
360
- def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
361
- super().transform(record, stream_slice, **kwargs)
362
- record["project_id"] = stream_slice["id"]
363
-
364
- return record
365
-
366
-
367
- class Tags(GitlabChildStream):
368
- primary_key = "name"
369
- flatten_id_keys = ["commit"]
370
-
371
- def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
372
- super().transform(record, stream_slice, **kwargs)
373
- record["project_id"] = stream_slice["id"]
374
-
375
- return record
376
-
377
-
378
- class Pipelines(IncrementalGitlabChildStream):
379
- pass
380
-
381
-
382
- class PipelinesExtended(GitlabChildStream):
383
- path_list = ["project_id", "id"]
384
- path_template = "projects/{project_id}/pipelines/{id}"
385
-
386
-
387
- class Jobs(GitlabChildStream):
388
- flatten_id_keys = ["user", "pipeline", "runner", "commit"]
389
- path_list = ["project_id", "id"]
390
- path_template = "projects/{project_id}/pipelines/{id}/jobs"
391
-
392
- def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
393
- super().transform(record, stream_slice, **kwargs)
394
- record["project_id"] = stream_slice["project_id"]
395
- return record
396
-
397
-
398
- class GroupIssueBoards(GitlabChildStream):
399
- path_template = "groups/{id}/boards"
400
- flatten_parent_id = True
401
-
402
-
403
- class Users(GitlabChildStream):
404
- pass
405
-
406
-
407
- class Epics(GitlabChildStream):
408
- primary_key = "iid"
409
- flatten_id_keys = ["author"]
410
-
411
-
412
- class EpicIssues(GitlabChildStream):
413
- primary_key = "epic_issue_id"
414
- path_list = ["group_id", "iid"]
415
- flatten_id_keys = ["milestone", "assignee", "author"]
416
- flatten_list_keys = ["assignees"]
417
- path_template = "groups/{group_id}/epics/{iid}/issues"
418
-
419
-
420
- class Deployments(GitlabChildStream):
421
- primary_key = "id"
422
- flatten_id_keys = ["user", "environment"]
423
- path_template = "projects/{id}/deployments"
424
-
425
- def transform(self, record, stream_slice: Mapping[str, Any] = None, **kwargs):
426
- super().transform(record, stream_slice, **kwargs)
427
- record["user_username"] = record["user"]["username"]
428
- record["user_full_name"] = record["user"]["name"]
429
- record["environment_name"] = record["environment"]["name"]
430
- record["project_id"] = stream_slice["id"]
431
- return record