airbyte-source-github 1.7.0__py3-none-any.whl → 1.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_github-1.7.0.dist-info → airbyte_source_github-1.7.1.dist-info}/METADATA +1 -1
- {airbyte_source_github-1.7.0.dist-info → airbyte_source_github-1.7.1.dist-info}/RECORD +7 -7
- source_github/source.py +17 -52
- source_github/spec.json +7 -6
- source_github/streams.py +40 -5
- {airbyte_source_github-1.7.0.dist-info → airbyte_source_github-1.7.1.dist-info}/WHEEL +0 -0
- {airbyte_source_github-1.7.0.dist-info → airbyte_source_github-1.7.1.dist-info}/entry_points.txt +0 -0
@@ -52,11 +52,11 @@ source_github/schemas/users.json,sha256=xASJmm56AqLYxSCfn5qlPy0xUVJOW8K3gWlwRr4J
|
|
52
52
|
source_github/schemas/workflow_jobs.json,sha256=ORowQYqvJhJE2EEV1jXyQSPCFmtO6NyhJZGTgpXte1Q,2089
|
53
53
|
source_github/schemas/workflow_runs.json,sha256=JWK1p1HQI2dDnutF4rd7gPG7Nx1_RJL2VXIka4KQwMQ,10171
|
54
54
|
source_github/schemas/workflows.json,sha256=zvtOslS-veNo5_iXmMxMNlY8OOt8DdvTZ3hjtdJbdvY,753
|
55
|
-
source_github/source.py,sha256=
|
56
|
-
source_github/spec.json,sha256=
|
57
|
-
source_github/streams.py,sha256=
|
55
|
+
source_github/source.py,sha256=jOGHJLL6ys4NRAjGs-Lw1RJxK25NxA5XS8n-uH2Gico,13879
|
56
|
+
source_github/spec.json,sha256=tsRjwqInYQjvqhm-Yzdn7_VC5QyInCGAToFJnUrCnOU,7074
|
57
|
+
source_github/streams.py,sha256=vcP2P0vCvShaa3z8M9dnmbY6awKR8vAIw3XgR8PlZtk,77006
|
58
58
|
source_github/utils.py,sha256=DfAHFjsF8hzDXeSCR6qtfs7W_av6o2BkkEVhtHpWbis,5462
|
59
|
-
airbyte_source_github-1.7.
|
60
|
-
airbyte_source_github-1.7.
|
61
|
-
airbyte_source_github-1.7.
|
62
|
-
airbyte_source_github-1.7.
|
59
|
+
airbyte_source_github-1.7.1.dist-info/METADATA,sha256=fr12eeFOb6XmyXnxebMc5CnwuBKsQZieK9wmbGt_aEo,5228
|
60
|
+
airbyte_source_github-1.7.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
61
|
+
airbyte_source_github-1.7.1.dist-info/entry_points.txt,sha256=gYhqVrTAZvMwuYByg0b_-o115yUFLLcfNxMrLZmiW9k,55
|
62
|
+
airbyte_source_github-1.7.1.dist-info/RECORD,,
|
source_github/source.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from os import getenv
|
6
|
-
from typing import Any,
|
6
|
+
from typing import Any, List, Mapping, MutableMapping, Optional, Tuple
|
7
7
|
from urllib.parse import urlparse
|
8
8
|
|
9
9
|
from airbyte_cdk import AirbyteLogger
|
@@ -65,7 +65,9 @@ class SourceGithub(AbstractSource):
|
|
65
65
|
continue_sync_on_stream_failure = True
|
66
66
|
|
67
67
|
@staticmethod
|
68
|
-
def _get_org_repositories(
|
68
|
+
def _get_org_repositories(
|
69
|
+
config: Mapping[str, Any], authenticator: MultipleTokenAuthenticator
|
70
|
+
) -> Tuple[List[str], List[str], Optional[str]]:
|
69
71
|
"""
|
70
72
|
Parse config/repositories and produce two lists: organizations, repositories.
|
71
73
|
Args:
|
@@ -78,16 +80,19 @@ class SourceGithub(AbstractSource):
|
|
78
80
|
organizations = set()
|
79
81
|
unchecked_repos = set()
|
80
82
|
unchecked_orgs = set()
|
83
|
+
pattern = None
|
81
84
|
|
82
85
|
for org_repos in config_repositories:
|
83
|
-
|
84
|
-
if
|
85
|
-
unchecked_orgs.add(
|
86
|
+
_, _, repos = org_repos.partition("/")
|
87
|
+
if "*" in repos:
|
88
|
+
unchecked_orgs.add(org_repos)
|
86
89
|
else:
|
87
90
|
unchecked_repos.add(org_repos)
|
88
91
|
|
89
92
|
if unchecked_orgs:
|
90
|
-
|
93
|
+
org_names = [org.split("/")[0] for org in unchecked_orgs]
|
94
|
+
pattern = "|".join([f"({org.replace('*', '.*')})" for org in unchecked_orgs])
|
95
|
+
stream = Repositories(authenticator=authenticator, organizations=org_names, api_url=config.get("api_url"), pattern=pattern)
|
91
96
|
for record in read_full_refresh(stream):
|
92
97
|
repositories.add(record["full_name"])
|
93
98
|
organizations.add(record["organization"])
|
@@ -96,7 +101,7 @@ class SourceGithub(AbstractSource):
|
|
96
101
|
if unchecked_repos:
|
97
102
|
stream = RepositoryStats(
|
98
103
|
authenticator=authenticator,
|
99
|
-
repositories=unchecked_repos,
|
104
|
+
repositories=list(unchecked_repos),
|
100
105
|
api_url=config.get("api_url"),
|
101
106
|
# This parameter is deprecated and in future will be used sane default, page_size: 10
|
102
107
|
page_size_for_large_streams=config.get("page_size_for_large_streams", constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM),
|
@@ -107,7 +112,7 @@ class SourceGithub(AbstractSource):
|
|
107
112
|
if organization:
|
108
113
|
organizations.add(organization)
|
109
114
|
|
110
|
-
return list(organizations), list(repositories)
|
115
|
+
return list(organizations), list(repositories), pattern
|
111
116
|
|
112
117
|
@staticmethod
|
113
118
|
def get_access_token(config: Mapping[str, Any]):
|
@@ -169,45 +174,6 @@ class SourceGithub(AbstractSource):
|
|
169
174
|
def _is_http_allowed() -> bool:
|
170
175
|
return getenv("DEPLOYMENT_MODE", "").upper() != "CLOUD"
|
171
176
|
|
172
|
-
@staticmethod
|
173
|
-
def _get_branches_data(
|
174
|
-
selected_branches: List, full_refresh_args: Dict[str, Any] = None
|
175
|
-
) -> Tuple[Dict[str, str], Dict[str, List[str]]]:
|
176
|
-
selected_branches = set(selected_branches)
|
177
|
-
|
178
|
-
# Get the default branch for each repository
|
179
|
-
default_branches = {}
|
180
|
-
repository_stats_stream = RepositoryStats(**full_refresh_args)
|
181
|
-
for stream_slice in repository_stats_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
182
|
-
default_branches.update(
|
183
|
-
{
|
184
|
-
repo_stats["full_name"]: repo_stats["default_branch"]
|
185
|
-
for repo_stats in repository_stats_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice)
|
186
|
-
}
|
187
|
-
)
|
188
|
-
|
189
|
-
all_branches = []
|
190
|
-
branches_stream = Branches(**full_refresh_args)
|
191
|
-
for stream_slice in branches_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
192
|
-
for branch in branches_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
|
193
|
-
all_branches.append(f"{branch['repository']}/{branch['name']}")
|
194
|
-
|
195
|
-
# Create mapping of repository to list of branches to pull commits for
|
196
|
-
# If no branches are specified for a repo, use its default branch
|
197
|
-
branches_to_pull: Dict[str, List[str]] = {}
|
198
|
-
for repo in full_refresh_args["repositories"]:
|
199
|
-
repo_branches = []
|
200
|
-
for branch in selected_branches:
|
201
|
-
branch_parts = branch.split("/", 2)
|
202
|
-
if "/".join(branch_parts[:2]) == repo and branch in all_branches:
|
203
|
-
repo_branches.append(branch_parts[-1])
|
204
|
-
if not repo_branches:
|
205
|
-
repo_branches = [default_branches[repo]]
|
206
|
-
|
207
|
-
branches_to_pull[repo] = repo_branches
|
208
|
-
|
209
|
-
return default_branches, branches_to_pull
|
210
|
-
|
211
177
|
def user_friendly_error_message(self, message: str) -> str:
|
212
178
|
user_message = ""
|
213
179
|
if "404 Client Error: Not Found for url: https://api.github.com/repos/" in message:
|
@@ -229,7 +195,7 @@ class SourceGithub(AbstractSource):
|
|
229
195
|
config = self._validate_and_transform_config(config)
|
230
196
|
try:
|
231
197
|
authenticator = self._get_authenticator(config)
|
232
|
-
_, repositories = self._get_org_repositories(config=config, authenticator=authenticator)
|
198
|
+
_, repositories, _ = self._get_org_repositories(config=config, authenticator=authenticator)
|
233
199
|
if not repositories:
|
234
200
|
return (
|
235
201
|
False,
|
@@ -246,7 +212,7 @@ class SourceGithub(AbstractSource):
|
|
246
212
|
authenticator = self._get_authenticator(config)
|
247
213
|
config = self._validate_and_transform_config(config)
|
248
214
|
try:
|
249
|
-
organizations, repositories = self._get_org_repositories(config=config, authenticator=authenticator)
|
215
|
+
organizations, repositories, pattern = self._get_org_repositories(config=config, authenticator=authenticator)
|
250
216
|
except Exception as e:
|
251
217
|
message = repr(e)
|
252
218
|
user_message = self.user_friendly_error_message(message)
|
@@ -291,7 +257,6 @@ class SourceGithub(AbstractSource):
|
|
291
257
|
}
|
292
258
|
repository_args_with_start_date = {**repository_args, "start_date": start_date}
|
293
259
|
|
294
|
-
default_branches, branches_to_pull = self._get_branches_data(config.get("branch", []), repository_args)
|
295
260
|
pull_requests_stream = PullRequests(**repository_args_with_start_date)
|
296
261
|
projects_stream = Projects(**repository_args_with_start_date)
|
297
262
|
project_columns_stream = ProjectColumns(projects_stream, **repository_args_with_start_date)
|
@@ -307,7 +272,7 @@ class SourceGithub(AbstractSource):
|
|
307
272
|
Comments(**repository_args_with_start_date),
|
308
273
|
CommitCommentReactions(**repository_args_with_start_date),
|
309
274
|
CommitComments(**repository_args_with_start_date),
|
310
|
-
Commits(**repository_args_with_start_date, branches_to_pull=
|
275
|
+
Commits(**repository_args_with_start_date, branches_to_pull=config.get("branches", [])),
|
311
276
|
ContributorActivity(**repository_args),
|
312
277
|
Deployments(**repository_args_with_start_date),
|
313
278
|
Events(**repository_args_with_start_date),
|
@@ -327,7 +292,7 @@ class SourceGithub(AbstractSource):
|
|
327
292
|
ProjectsV2(**repository_args_with_start_date),
|
328
293
|
pull_requests_stream,
|
329
294
|
Releases(**repository_args_with_start_date),
|
330
|
-
Repositories(**organization_args_with_start_date),
|
295
|
+
Repositories(**organization_args_with_start_date, pattern=pattern),
|
331
296
|
ReviewComments(**repository_args_with_start_date),
|
332
297
|
Reviews(**repository_args_with_start_date),
|
333
298
|
Stargazers(**repository_args_with_start_date),
|
source_github/spec.json
CHANGED
@@ -81,18 +81,19 @@
|
|
81
81
|
"type": "array",
|
82
82
|
"items": {
|
83
83
|
"type": "string",
|
84
|
-
"pattern": "^
|
84
|
+
"pattern": "^[\\w.-]+/(([\\w.-]*\\*)|[\\w.-]+(?<!\\.git))$"
|
85
85
|
},
|
86
86
|
"minItems": 1,
|
87
87
|
"examples": [
|
88
|
-
"airbytehq/airbyte
|
88
|
+
"airbytehq/airbyte",
|
89
|
+
"airbytehq/another-repo",
|
89
90
|
"airbytehq/*",
|
90
|
-
"airbytehq/
|
91
|
+
"airbytehq/a*"
|
91
92
|
],
|
92
93
|
"title": "GitHub Repositories",
|
93
|
-
"description": "List of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/*` for get all repositories from organization and `airbytehq/
|
94
|
+
"description": "List of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/*` for get all repositories from organization and `airbytehq/a* for matching multiple repositories by pattern.",
|
94
95
|
"order": 1,
|
95
|
-
"pattern_descriptor": "org/repo org/another-repo org/*"
|
96
|
+
"pattern_descriptor": "org/repo org/another-repo org/* org/a*"
|
96
97
|
},
|
97
98
|
"start_date": {
|
98
99
|
"type": "string",
|
@@ -126,7 +127,7 @@
|
|
126
127
|
"type": "string"
|
127
128
|
},
|
128
129
|
"title": "Branches",
|
129
|
-
"examples": ["airbytehq/airbyte/master airbytehq/airbyte/my-branch"],
|
130
|
+
"examples": ["airbytehq/airbyte/master", "airbytehq/airbyte/my-branch"],
|
130
131
|
"description": "List of GitHub repository branches to pull commits for, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled.",
|
131
132
|
"order": 4,
|
132
133
|
"pattern_descriptor": "org/repo/branch1 org/repo/branch2"
|
source_github/streams.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
import re
|
5
6
|
import time
|
6
7
|
from abc import ABC, abstractmethod
|
7
8
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional
|
@@ -441,12 +442,18 @@ class Repositories(SemiIncrementalMixin, Organizations):
|
|
441
442
|
"direction": "desc",
|
442
443
|
}
|
443
444
|
|
445
|
+
def __init__(self, *args, pattern: Optional[str] = None, **kwargs):
|
446
|
+
self._pattern = re.compile(pattern) if pattern else pattern
|
447
|
+
super().__init__(*args, **kwargs)
|
448
|
+
|
444
449
|
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
445
450
|
return f"orgs/{stream_slice['organization']}/repos"
|
446
451
|
|
447
452
|
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
|
448
453
|
for record in response.json(): # GitHub puts records in an array.
|
449
|
-
|
454
|
+
record = self.transform(record=record, stream_slice=stream_slice)
|
455
|
+
if not self._pattern or self._pattern.match(record["full_name"]):
|
456
|
+
yield record
|
450
457
|
|
451
458
|
|
452
459
|
class Tags(GithubStream):
|
@@ -676,10 +683,13 @@ class Commits(IncrementalMixin, GithubStream):
|
|
676
683
|
cursor_field = "created_at"
|
677
684
|
slice_keys = ["repository", "branch"]
|
678
685
|
|
679
|
-
def __init__(self, branches_to_pull:
|
686
|
+
def __init__(self, branches_to_pull: List[str], **kwargs):
|
680
687
|
super().__init__(**kwargs)
|
681
|
-
|
682
|
-
self.
|
688
|
+
kwargs.pop("start_date")
|
689
|
+
self.branches_to_repos = {}
|
690
|
+
self.branches_to_pull = set(branches_to_pull)
|
691
|
+
self.branches_stream = Branches(**kwargs)
|
692
|
+
self.repositories_stream = RepositoryStats(**kwargs)
|
683
693
|
|
684
694
|
def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
|
685
695
|
params = super(IncrementalMixin, self).request_params(stream_state=stream_state, stream_slice=stream_slice, **kwargs)
|
@@ -690,9 +700,10 @@ class Commits(IncrementalMixin, GithubStream):
|
|
690
700
|
return params
|
691
701
|
|
692
702
|
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
703
|
+
self._validate_branches_to_pull()
|
693
704
|
for stream_slice in super().stream_slices(**kwargs):
|
694
705
|
repository = stream_slice["repository"]
|
695
|
-
for branch in self.
|
706
|
+
for branch in self.branches_to_repos.get(repository, []):
|
696
707
|
yield {"branch": branch, "repository": repository}
|
697
708
|
|
698
709
|
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
@@ -718,6 +729,30 @@ class Commits(IncrementalMixin, GithubStream):
|
|
718
729
|
current_stream_state.setdefault(repository, {}).setdefault(branch, {})[self.cursor_field] = updated_state
|
719
730
|
return current_stream_state
|
720
731
|
|
732
|
+
def _validate_branches_to_pull(self):
|
733
|
+
# Get the default branch for each repository
|
734
|
+
default_branches = {}
|
735
|
+
for stream_slice in self.repositories_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
736
|
+
for repo_stats in self.repositories_stream.read_records(stream_slice=stream_slice, sync_mode=SyncMode.full_refresh):
|
737
|
+
default_branches[repo_stats["full_name"]] = repo_stats["default_branch"]
|
738
|
+
|
739
|
+
all_branches = []
|
740
|
+
for stream_slice in self.branches_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
741
|
+
for branch in self.branches_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
|
742
|
+
all_branches.append(f"{branch['repository']}/{branch['name']}")
|
743
|
+
|
744
|
+
# Create mapping of repository to list of branches to pull commits for
|
745
|
+
# If no branches are specified for a repo, use its default branch
|
746
|
+
for repo in self.repositories:
|
747
|
+
repo_branches = []
|
748
|
+
for branch in self.branches_to_pull:
|
749
|
+
branch_parts = branch.split("/", 2)
|
750
|
+
if "/".join(branch_parts[:2]) == repo and branch in all_branches:
|
751
|
+
repo_branches.append(branch_parts[-1])
|
752
|
+
if not repo_branches:
|
753
|
+
repo_branches = [default_branches[repo]]
|
754
|
+
self.branches_to_repos[repo] = repo_branches
|
755
|
+
|
721
756
|
|
722
757
|
class Issues(IncrementalMixin, GithubStream):
|
723
758
|
"""
|
File without changes
|
{airbyte_source_github-1.7.0.dist-info → airbyte_source_github-1.7.1.dist-info}/entry_points.txt
RENAMED
File without changes
|