airbyte-source-github 1.6.6.dev202403201925__py3-none-any.whl → 1.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_github-1.6.6.dev202403201925.dist-info → airbyte_source_github-1.7.1.dist-info}/METADATA +2 -2
- {airbyte_source_github-1.6.6.dev202403201925.dist-info → airbyte_source_github-1.7.1.dist-info}/RECORD +7 -7
- source_github/source.py +17 -52
- source_github/spec.json +7 -6
- source_github/streams.py +41 -7
- {airbyte_source_github-1.6.6.dev202403201925.dist-info → airbyte_source_github-1.7.1.dist-info}/WHEEL +0 -0
- {airbyte_source_github-1.6.6.dev202403201925.dist-info → airbyte_source_github-1.7.1.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-source-github
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.7.1
|
4
4
|
Summary: Source implementation for GitHub.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.9
|
13
13
|
Classifier: Programming Language :: Python :: 3.10
|
14
14
|
Classifier: Programming Language :: Python :: 3.11
|
15
|
-
Requires-Dist: airbyte-cdk (>=0
|
15
|
+
Requires-Dist: airbyte-cdk (>=0,<1)
|
16
16
|
Requires-Dist: sgqlc (==16.3)
|
17
17
|
Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/github
|
18
18
|
Project-URL: Repository, https://github.com/airbytehq/airbyte
|
@@ -52,11 +52,11 @@ source_github/schemas/users.json,sha256=xASJmm56AqLYxSCfn5qlPy0xUVJOW8K3gWlwRr4J
|
|
52
52
|
source_github/schemas/workflow_jobs.json,sha256=ORowQYqvJhJE2EEV1jXyQSPCFmtO6NyhJZGTgpXte1Q,2089
|
53
53
|
source_github/schemas/workflow_runs.json,sha256=JWK1p1HQI2dDnutF4rd7gPG7Nx1_RJL2VXIka4KQwMQ,10171
|
54
54
|
source_github/schemas/workflows.json,sha256=zvtOslS-veNo5_iXmMxMNlY8OOt8DdvTZ3hjtdJbdvY,753
|
55
|
-
source_github/source.py,sha256=
|
56
|
-
source_github/spec.json,sha256=
|
57
|
-
source_github/streams.py,sha256=
|
55
|
+
source_github/source.py,sha256=jOGHJLL6ys4NRAjGs-Lw1RJxK25NxA5XS8n-uH2Gico,13879
|
56
|
+
source_github/spec.json,sha256=tsRjwqInYQjvqhm-Yzdn7_VC5QyInCGAToFJnUrCnOU,7074
|
57
|
+
source_github/streams.py,sha256=vcP2P0vCvShaa3z8M9dnmbY6awKR8vAIw3XgR8PlZtk,77006
|
58
58
|
source_github/utils.py,sha256=DfAHFjsF8hzDXeSCR6qtfs7W_av6o2BkkEVhtHpWbis,5462
|
59
|
-
airbyte_source_github-1.
|
60
|
-
airbyte_source_github-1.
|
61
|
-
airbyte_source_github-1.
|
62
|
-
airbyte_source_github-1.
|
59
|
+
airbyte_source_github-1.7.1.dist-info/METADATA,sha256=fr12eeFOb6XmyXnxebMc5CnwuBKsQZieK9wmbGt_aEo,5228
|
60
|
+
airbyte_source_github-1.7.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
61
|
+
airbyte_source_github-1.7.1.dist-info/entry_points.txt,sha256=gYhqVrTAZvMwuYByg0b_-o115yUFLLcfNxMrLZmiW9k,55
|
62
|
+
airbyte_source_github-1.7.1.dist-info/RECORD,,
|
source_github/source.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from os import getenv
|
6
|
-
from typing import Any,
|
6
|
+
from typing import Any, List, Mapping, MutableMapping, Optional, Tuple
|
7
7
|
from urllib.parse import urlparse
|
8
8
|
|
9
9
|
from airbyte_cdk import AirbyteLogger
|
@@ -65,7 +65,9 @@ class SourceGithub(AbstractSource):
|
|
65
65
|
continue_sync_on_stream_failure = True
|
66
66
|
|
67
67
|
@staticmethod
|
68
|
-
def _get_org_repositories(
|
68
|
+
def _get_org_repositories(
|
69
|
+
config: Mapping[str, Any], authenticator: MultipleTokenAuthenticator
|
70
|
+
) -> Tuple[List[str], List[str], Optional[str]]:
|
69
71
|
"""
|
70
72
|
Parse config/repositories and produce two lists: organizations, repositories.
|
71
73
|
Args:
|
@@ -78,16 +80,19 @@ class SourceGithub(AbstractSource):
|
|
78
80
|
organizations = set()
|
79
81
|
unchecked_repos = set()
|
80
82
|
unchecked_orgs = set()
|
83
|
+
pattern = None
|
81
84
|
|
82
85
|
for org_repos in config_repositories:
|
83
|
-
|
84
|
-
if
|
85
|
-
unchecked_orgs.add(
|
86
|
+
_, _, repos = org_repos.partition("/")
|
87
|
+
if "*" in repos:
|
88
|
+
unchecked_orgs.add(org_repos)
|
86
89
|
else:
|
87
90
|
unchecked_repos.add(org_repos)
|
88
91
|
|
89
92
|
if unchecked_orgs:
|
90
|
-
|
93
|
+
org_names = [org.split("/")[0] for org in unchecked_orgs]
|
94
|
+
pattern = "|".join([f"({org.replace('*', '.*')})" for org in unchecked_orgs])
|
95
|
+
stream = Repositories(authenticator=authenticator, organizations=org_names, api_url=config.get("api_url"), pattern=pattern)
|
91
96
|
for record in read_full_refresh(stream):
|
92
97
|
repositories.add(record["full_name"])
|
93
98
|
organizations.add(record["organization"])
|
@@ -96,7 +101,7 @@ class SourceGithub(AbstractSource):
|
|
96
101
|
if unchecked_repos:
|
97
102
|
stream = RepositoryStats(
|
98
103
|
authenticator=authenticator,
|
99
|
-
repositories=unchecked_repos,
|
104
|
+
repositories=list(unchecked_repos),
|
100
105
|
api_url=config.get("api_url"),
|
101
106
|
# This parameter is deprecated and in future will be used sane default, page_size: 10
|
102
107
|
page_size_for_large_streams=config.get("page_size_for_large_streams", constants.DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM),
|
@@ -107,7 +112,7 @@ class SourceGithub(AbstractSource):
|
|
107
112
|
if organization:
|
108
113
|
organizations.add(organization)
|
109
114
|
|
110
|
-
return list(organizations), list(repositories)
|
115
|
+
return list(organizations), list(repositories), pattern
|
111
116
|
|
112
117
|
@staticmethod
|
113
118
|
def get_access_token(config: Mapping[str, Any]):
|
@@ -169,45 +174,6 @@ class SourceGithub(AbstractSource):
|
|
169
174
|
def _is_http_allowed() -> bool:
|
170
175
|
return getenv("DEPLOYMENT_MODE", "").upper() != "CLOUD"
|
171
176
|
|
172
|
-
@staticmethod
|
173
|
-
def _get_branches_data(
|
174
|
-
selected_branches: List, full_refresh_args: Dict[str, Any] = None
|
175
|
-
) -> Tuple[Dict[str, str], Dict[str, List[str]]]:
|
176
|
-
selected_branches = set(selected_branches)
|
177
|
-
|
178
|
-
# Get the default branch for each repository
|
179
|
-
default_branches = {}
|
180
|
-
repository_stats_stream = RepositoryStats(**full_refresh_args)
|
181
|
-
for stream_slice in repository_stats_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
182
|
-
default_branches.update(
|
183
|
-
{
|
184
|
-
repo_stats["full_name"]: repo_stats["default_branch"]
|
185
|
-
for repo_stats in repository_stats_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice)
|
186
|
-
}
|
187
|
-
)
|
188
|
-
|
189
|
-
all_branches = []
|
190
|
-
branches_stream = Branches(**full_refresh_args)
|
191
|
-
for stream_slice in branches_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
192
|
-
for branch in branches_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
|
193
|
-
all_branches.append(f"{branch['repository']}/{branch['name']}")
|
194
|
-
|
195
|
-
# Create mapping of repository to list of branches to pull commits for
|
196
|
-
# If no branches are specified for a repo, use its default branch
|
197
|
-
branches_to_pull: Dict[str, List[str]] = {}
|
198
|
-
for repo in full_refresh_args["repositories"]:
|
199
|
-
repo_branches = []
|
200
|
-
for branch in selected_branches:
|
201
|
-
branch_parts = branch.split("/", 2)
|
202
|
-
if "/".join(branch_parts[:2]) == repo and branch in all_branches:
|
203
|
-
repo_branches.append(branch_parts[-1])
|
204
|
-
if not repo_branches:
|
205
|
-
repo_branches = [default_branches[repo]]
|
206
|
-
|
207
|
-
branches_to_pull[repo] = repo_branches
|
208
|
-
|
209
|
-
return default_branches, branches_to_pull
|
210
|
-
|
211
177
|
def user_friendly_error_message(self, message: str) -> str:
|
212
178
|
user_message = ""
|
213
179
|
if "404 Client Error: Not Found for url: https://api.github.com/repos/" in message:
|
@@ -229,7 +195,7 @@ class SourceGithub(AbstractSource):
|
|
229
195
|
config = self._validate_and_transform_config(config)
|
230
196
|
try:
|
231
197
|
authenticator = self._get_authenticator(config)
|
232
|
-
_, repositories = self._get_org_repositories(config=config, authenticator=authenticator)
|
198
|
+
_, repositories, _ = self._get_org_repositories(config=config, authenticator=authenticator)
|
233
199
|
if not repositories:
|
234
200
|
return (
|
235
201
|
False,
|
@@ -246,7 +212,7 @@ class SourceGithub(AbstractSource):
|
|
246
212
|
authenticator = self._get_authenticator(config)
|
247
213
|
config = self._validate_and_transform_config(config)
|
248
214
|
try:
|
249
|
-
organizations, repositories = self._get_org_repositories(config=config, authenticator=authenticator)
|
215
|
+
organizations, repositories, pattern = self._get_org_repositories(config=config, authenticator=authenticator)
|
250
216
|
except Exception as e:
|
251
217
|
message = repr(e)
|
252
218
|
user_message = self.user_friendly_error_message(message)
|
@@ -291,7 +257,6 @@ class SourceGithub(AbstractSource):
|
|
291
257
|
}
|
292
258
|
repository_args_with_start_date = {**repository_args, "start_date": start_date}
|
293
259
|
|
294
|
-
default_branches, branches_to_pull = self._get_branches_data(config.get("branch", []), repository_args)
|
295
260
|
pull_requests_stream = PullRequests(**repository_args_with_start_date)
|
296
261
|
projects_stream = Projects(**repository_args_with_start_date)
|
297
262
|
project_columns_stream = ProjectColumns(projects_stream, **repository_args_with_start_date)
|
@@ -307,7 +272,7 @@ class SourceGithub(AbstractSource):
|
|
307
272
|
Comments(**repository_args_with_start_date),
|
308
273
|
CommitCommentReactions(**repository_args_with_start_date),
|
309
274
|
CommitComments(**repository_args_with_start_date),
|
310
|
-
Commits(**repository_args_with_start_date, branches_to_pull=
|
275
|
+
Commits(**repository_args_with_start_date, branches_to_pull=config.get("branches", [])),
|
311
276
|
ContributorActivity(**repository_args),
|
312
277
|
Deployments(**repository_args_with_start_date),
|
313
278
|
Events(**repository_args_with_start_date),
|
@@ -327,7 +292,7 @@ class SourceGithub(AbstractSource):
|
|
327
292
|
ProjectsV2(**repository_args_with_start_date),
|
328
293
|
pull_requests_stream,
|
329
294
|
Releases(**repository_args_with_start_date),
|
330
|
-
Repositories(**organization_args_with_start_date),
|
295
|
+
Repositories(**organization_args_with_start_date, pattern=pattern),
|
331
296
|
ReviewComments(**repository_args_with_start_date),
|
332
297
|
Reviews(**repository_args_with_start_date),
|
333
298
|
Stargazers(**repository_args_with_start_date),
|
source_github/spec.json
CHANGED
@@ -81,18 +81,19 @@
|
|
81
81
|
"type": "array",
|
82
82
|
"items": {
|
83
83
|
"type": "string",
|
84
|
-
"pattern": "^
|
84
|
+
"pattern": "^[\\w.-]+/(([\\w.-]*\\*)|[\\w.-]+(?<!\\.git))$"
|
85
85
|
},
|
86
86
|
"minItems": 1,
|
87
87
|
"examples": [
|
88
|
-
"airbytehq/airbyte
|
88
|
+
"airbytehq/airbyte",
|
89
|
+
"airbytehq/another-repo",
|
89
90
|
"airbytehq/*",
|
90
|
-
"airbytehq/
|
91
|
+
"airbytehq/a*"
|
91
92
|
],
|
92
93
|
"title": "GitHub Repositories",
|
93
|
-
"description": "List of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/*` for get all repositories from organization and `airbytehq/
|
94
|
+
"description": "List of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/*` for get all repositories from organization and `airbytehq/a* for matching multiple repositories by pattern.",
|
94
95
|
"order": 1,
|
95
|
-
"pattern_descriptor": "org/repo org/another-repo org/*"
|
96
|
+
"pattern_descriptor": "org/repo org/another-repo org/* org/a*"
|
96
97
|
},
|
97
98
|
"start_date": {
|
98
99
|
"type": "string",
|
@@ -126,7 +127,7 @@
|
|
126
127
|
"type": "string"
|
127
128
|
},
|
128
129
|
"title": "Branches",
|
129
|
-
"examples": ["airbytehq/airbyte/master airbytehq/airbyte/my-branch"],
|
130
|
+
"examples": ["airbytehq/airbyte/master", "airbytehq/airbyte/my-branch"],
|
130
131
|
"description": "List of GitHub repository branches to pull commits for, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled.",
|
131
132
|
"order": 4,
|
132
133
|
"pattern_descriptor": "org/repo/branch1 org/repo/branch2"
|
source_github/streams.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
import re
|
5
6
|
import time
|
6
7
|
from abc import ABC, abstractmethod
|
7
8
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional
|
@@ -41,8 +42,7 @@ class GithubStreamABC(HttpStream, ABC):
|
|
41
42
|
|
42
43
|
def __init__(self, api_url: str = "https://api.github.com", access_token_type: str = "", **kwargs):
|
43
44
|
if kwargs.get("authenticator"):
|
44
|
-
|
45
|
-
kwargs["authenticator"].max_time = 60 * 60 # Set to 60 minutes to bypass limitation of CDK
|
45
|
+
kwargs["authenticator"].max_time = self.max_time
|
46
46
|
super().__init__(**kwargs)
|
47
47
|
|
48
48
|
self.access_token_type = access_token_type
|
@@ -442,12 +442,18 @@ class Repositories(SemiIncrementalMixin, Organizations):
|
|
442
442
|
"direction": "desc",
|
443
443
|
}
|
444
444
|
|
445
|
+
def __init__(self, *args, pattern: Optional[str] = None, **kwargs):
|
446
|
+
self._pattern = re.compile(pattern) if pattern else pattern
|
447
|
+
super().__init__(*args, **kwargs)
|
448
|
+
|
445
449
|
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
446
450
|
return f"orgs/{stream_slice['organization']}/repos"
|
447
451
|
|
448
452
|
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
|
449
453
|
for record in response.json(): # GitHub puts records in an array.
|
450
|
-
|
454
|
+
record = self.transform(record=record, stream_slice=stream_slice)
|
455
|
+
if not self._pattern or self._pattern.match(record["full_name"]):
|
456
|
+
yield record
|
451
457
|
|
452
458
|
|
453
459
|
class Tags(GithubStream):
|
@@ -677,10 +683,13 @@ class Commits(IncrementalMixin, GithubStream):
|
|
677
683
|
cursor_field = "created_at"
|
678
684
|
slice_keys = ["repository", "branch"]
|
679
685
|
|
680
|
-
def __init__(self, branches_to_pull:
|
686
|
+
def __init__(self, branches_to_pull: List[str], **kwargs):
|
681
687
|
super().__init__(**kwargs)
|
682
|
-
|
683
|
-
self.
|
688
|
+
kwargs.pop("start_date")
|
689
|
+
self.branches_to_repos = {}
|
690
|
+
self.branches_to_pull = set(branches_to_pull)
|
691
|
+
self.branches_stream = Branches(**kwargs)
|
692
|
+
self.repositories_stream = RepositoryStats(**kwargs)
|
684
693
|
|
685
694
|
def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
|
686
695
|
params = super(IncrementalMixin, self).request_params(stream_state=stream_state, stream_slice=stream_slice, **kwargs)
|
@@ -691,9 +700,10 @@ class Commits(IncrementalMixin, GithubStream):
|
|
691
700
|
return params
|
692
701
|
|
693
702
|
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
703
|
+
self._validate_branches_to_pull()
|
694
704
|
for stream_slice in super().stream_slices(**kwargs):
|
695
705
|
repository = stream_slice["repository"]
|
696
|
-
for branch in self.
|
706
|
+
for branch in self.branches_to_repos.get(repository, []):
|
697
707
|
yield {"branch": branch, "repository": repository}
|
698
708
|
|
699
709
|
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
@@ -719,6 +729,30 @@ class Commits(IncrementalMixin, GithubStream):
|
|
719
729
|
current_stream_state.setdefault(repository, {}).setdefault(branch, {})[self.cursor_field] = updated_state
|
720
730
|
return current_stream_state
|
721
731
|
|
732
|
+
def _validate_branches_to_pull(self):
|
733
|
+
# Get the default branch for each repository
|
734
|
+
default_branches = {}
|
735
|
+
for stream_slice in self.repositories_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
736
|
+
for repo_stats in self.repositories_stream.read_records(stream_slice=stream_slice, sync_mode=SyncMode.full_refresh):
|
737
|
+
default_branches[repo_stats["full_name"]] = repo_stats["default_branch"]
|
738
|
+
|
739
|
+
all_branches = []
|
740
|
+
for stream_slice in self.branches_stream.stream_slices(sync_mode=SyncMode.full_refresh):
|
741
|
+
for branch in self.branches_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
|
742
|
+
all_branches.append(f"{branch['repository']}/{branch['name']}")
|
743
|
+
|
744
|
+
# Create mapping of repository to list of branches to pull commits for
|
745
|
+
# If no branches are specified for a repo, use its default branch
|
746
|
+
for repo in self.repositories:
|
747
|
+
repo_branches = []
|
748
|
+
for branch in self.branches_to_pull:
|
749
|
+
branch_parts = branch.split("/", 2)
|
750
|
+
if "/".join(branch_parts[:2]) == repo and branch in all_branches:
|
751
|
+
repo_branches.append(branch_parts[-1])
|
752
|
+
if not repo_branches:
|
753
|
+
repo_branches = [default_branches[repo]]
|
754
|
+
self.branches_to_repos[repo] = repo_branches
|
755
|
+
|
722
756
|
|
723
757
|
class Issues(IncrementalMixin, GithubStream):
|
724
758
|
"""
|
File without changes
|
File without changes
|