airbyte-source-github 2.1.12__tar.gz → 2.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/PKG-INFO +1 -1
  2. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/pyproject.toml +1 -1
  3. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/graphql.py +48 -0
  4. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/releases.json +13 -24
  5. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/streams.py +129 -18
  6. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/README.md +0 -0
  7. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/__init__.py +0 -0
  8. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/backoff_strategies.py +0 -0
  9. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/config_migrations.py +0 -0
  10. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/constants.py +0 -0
  11. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/errors_handlers.py +0 -0
  12. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/github_schema.py +0 -0
  13. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/run.py +0 -0
  14. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/assignees.json +0 -0
  15. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/branches.json +0 -0
  16. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/collaborators.json +0 -0
  17. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/comments.json +0 -0
  18. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/commit_comment_reactions.json +0 -0
  19. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/commit_comments.json +0 -0
  20. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/commits.json +0 -0
  21. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/contributor_activity.json +0 -0
  22. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/deployments.json +0 -0
  23. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/events.json +0 -0
  24. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/issue_comment_reactions.json +0 -0
  25. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/issue_events.json +0 -0
  26. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/issue_labels.json +0 -0
  27. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/issue_milestones.json +0 -0
  28. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/issue_reactions.json +0 -0
  29. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/issue_timeline_events.json +0 -0
  30. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/issues.json +0 -0
  31. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/organizations.json +0 -0
  32. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/project_cards.json +0 -0
  33. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/project_columns.json +0 -0
  34. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/projects.json +0 -0
  35. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/projects_v2.json +0 -0
  36. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/pull_request_comment_reactions.json +0 -0
  37. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/pull_request_commits.json +0 -0
  38. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/pull_request_stats.json +0 -0
  39. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/pull_requests.json +0 -0
  40. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/repositories.json +0 -0
  41. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/review_comments.json +0 -0
  42. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/reviews.json +0 -0
  43. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/shared/events/comment.json +0 -0
  44. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/shared/events/commented.json +0 -0
  45. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/shared/events/committed.json +0 -0
  46. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/shared/events/cross_referenced.json +0 -0
  47. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/shared/events/reviewed.json +0 -0
  48. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/shared/reaction.json +0 -0
  49. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/shared/reactions.json +0 -0
  50. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/shared/user.json +0 -0
  51. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/shared/user_graphql.json +0 -0
  52. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/stargazers.json +0 -0
  53. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/tags.json +0 -0
  54. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/team_members.json +0 -0
  55. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/team_memberships.json +0 -0
  56. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/teams.json +0 -0
  57. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/users.json +0 -0
  58. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/workflow_jobs.json +0 -0
  59. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/workflow_runs.json +0 -0
  60. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/schemas/workflows.json +0 -0
  61. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/source.py +0 -0
  62. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/spec.json +0 -0
  63. {airbyte_source_github-2.1.12 → airbyte_source_github-2.1.13}/source_github/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-github
3
- Version: 2.1.12
3
+ Version: 2.1.13
4
4
  Summary: Source implementation for GitHub.
5
5
  Home-page: https://airbyte.com
6
6
  License: ELv2
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
3
3
  build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
- version = "2.1.12"
6
+ version = "2.1.13"
7
7
  name = "airbyte-source-github"
8
8
  description = "Source implementation for GitHub."
9
9
  authors = [ "Airbyte <contact@airbyte.io>",]
@@ -165,6 +165,54 @@ def get_query_issue_reactions(owner, name, first, after, number=None):
165
165
  return str(op)
166
166
 
167
167
 
168
+ def get_query_releases(owner, name, first, after):
169
+ kwargs = {"first": first, "order_by": {"field": "CREATED_AT", "direction": "ASC"}}
170
+ if after:
171
+ kwargs["after"] = after
172
+
173
+ op = sgqlc.operation.Operation(_schema_root.query_type)
174
+ repository = op.repository(owner=owner, name=name)
175
+ repository.name()
176
+ repository.owner.login()
177
+ releases = repository.releases(**kwargs)
178
+ releases.nodes.__fields__(
179
+ id="node_id",
180
+ database_id="id",
181
+ name=True,
182
+ tag_name="tag_name",
183
+ created_at="created_at",
184
+ published_at="published_at",
185
+ updated_at="updated_at",
186
+ is_draft="draft",
187
+ is_prerelease="prerelease",
188
+ description="body",
189
+ description_html="body_html",
190
+ url="html_url",
191
+ )
192
+ releases.nodes.tag_commit.oid(__alias__="target_commitish")
193
+ author = releases.nodes.author(__alias__="author").__as__(_schema_root.User)
194
+ select_user_fields(author)
195
+ release_assets = releases.nodes.release_assets(first=100, __alias__="assets")
196
+ release_assets.nodes.__fields__(
197
+ id="node_id",
198
+ name=True,
199
+ content_type="content_type",
200
+ size=True,
201
+ download_count="download_count",
202
+ created_at="created_at",
203
+ updated_at="updated_at",
204
+ download_url="browser_download_url",
205
+ url=True,
206
+ )
207
+ release_assets.nodes.uploaded_by(__alias__="uploader").__as__(_schema_root.User).__fields__(database_id="id")
208
+ release_assets.page_info.__fields__(has_next_page=True)
209
+ releases.nodes.reaction_groups(__alias__="reaction_groups").__fields__(content=True)
210
+ releases.nodes.reaction_groups(__alias__="reaction_groups").reactors.__fields__(total_count=True)
211
+ releases.nodes.mentions(first=0, __alias__="mentions_connection").total_count()
212
+ releases.page_info.__fields__(has_next_page=True, end_cursor=True)
213
+ return str(op)
214
+
215
+
168
216
  class QueryReactions:
169
217
  # AVERAGE_REVIEWS - optimal number of reviews to fetch inside every pull request.
170
218
  # If we try to fetch too many (up to 100) we will spend too many scores of query cost.
@@ -7,7 +7,7 @@
7
7
  "type": "string"
8
8
  },
9
9
  "url": {
10
- "description": "The URL for the release.",
10
+ "description": "The REST API URL for the release (synthesized from GraphQL data).",
11
11
  "type": ["null", "string"]
12
12
  },
13
13
  "html_url": {
@@ -15,19 +15,19 @@
15
15
  "type": ["null", "string"]
16
16
  },
17
17
  "assets_url": {
18
- "description": "The URL to fetch information about the assets linked to this release.",
18
+ "description": "The URL to fetch information about the assets linked to this release (synthesized from GraphQL data).",
19
19
  "type": ["null", "string"]
20
20
  },
21
21
  "upload_url": {
22
- "description": "The URL for uploading assets to the release.",
22
+ "description": "The URL for uploading assets to the release (synthesized from GraphQL data).",
23
23
  "type": ["null", "string"]
24
24
  },
25
25
  "tarball_url": {
26
- "description": "The URL for the tarball file of the release.",
26
+ "description": "The URL for the tarball file of the release (synthesized from GraphQL data).",
27
27
  "type": ["null", "string"]
28
28
  },
29
29
  "zipball_url": {
30
- "description": "The URL for the zipball file of the release.",
30
+ "description": "The URL for the zipball file of the release (synthesized from GraphQL data).",
31
31
  "type": ["null", "string"]
32
32
  },
33
33
  "id": {
@@ -43,7 +43,7 @@
43
43
  "type": ["null", "string"]
44
44
  },
45
45
  "target_commitish": {
46
- "description": "The commit SHA or branch name for the release.",
46
+ "description": "The commit SHA the release is based on. Note: the GraphQL API returns the commit SHA here, not the branch name returned by the REST API.",
47
47
  "type": ["null", "string"]
48
48
  },
49
49
  "name": {
@@ -72,12 +72,17 @@
72
72
  "type": ["null", "string"],
73
73
  "format": "date-time"
74
74
  },
75
+ "updated_at": {
76
+ "description": "The timestamp of when the release was last updated.",
77
+ "type": ["null", "string"],
78
+ "format": "date-time"
79
+ },
75
80
  "author": {
76
81
  "description": "The author of the release.",
77
82
  "$ref": "user.json"
78
83
  },
79
84
  "assets": {
80
- "description": "List of assets (e.g., downloadable files) associated with the release",
85
+ "description": "List of assets (e.g., downloadable files) associated with the release. Capped at 100 per release.",
81
86
  "type": ["null", "array"],
82
87
  "items": {
83
88
  "description": "Details of an individual asset",
@@ -92,7 +97,7 @@
92
97
  "type": ["null", "string"]
93
98
  },
94
99
  "id": {
95
- "description": "The unique identifier for the asset.",
100
+ "description": "The unique numeric identifier of the asset (extracted from the GraphQL Node ID).",
96
101
  "type": ["null", "integer"]
97
102
  },
98
103
  "node_id": {
@@ -103,14 +108,6 @@
103
108
  "description": "The name of the asset.",
104
109
  "type": ["null", "string"]
105
110
  },
106
- "label": {
107
- "description": "The label assigned to the asset.",
108
- "type": ["null", "string"]
109
- },
110
- "state": {
111
- "description": "The state of the asset.",
112
- "type": ["null", "string"]
113
- },
114
111
  "content_type": {
115
112
  "description": "The content type of the asset.",
116
113
  "type": ["null", "string"]
@@ -144,18 +141,10 @@
144
141
  "description": "The HTML body of the release.",
145
142
  "type": ["null", "string"]
146
143
  },
147
- "body_text": {
148
- "description": "The text body of the release.",
149
- "type": ["null", "string"]
150
- },
151
144
  "mentions_count": {
152
145
  "description": "The count of mentions in the release.",
153
146
  "type": ["null", "integer"]
154
147
  },
155
- "discussion_url": {
156
- "description": "The URL for the discussion related to the release.",
157
- "type": ["null", "string"]
158
- },
159
148
  "reactions": {
160
149
  "description": "The reactions associated with the release.",
161
150
  "$ref": "reactions.json"
@@ -2,7 +2,9 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ import base64
5
6
  import re
7
+ import struct
6
8
  from abc import ABC, abstractmethod
7
9
  from datetime import timedelta, timezone
8
10
  from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
@@ -39,6 +41,7 @@ from .graphql import (
39
41
  get_query_issue_reactions,
40
42
  get_query_projectsV2,
41
43
  get_query_pull_requests,
44
+ get_query_releases,
42
45
  get_query_reviews,
43
46
  )
44
47
  from .utils import GitHubAPILimitException, getter
@@ -502,24 +505,6 @@ class Users(Organizations):
502
505
  # Below are semi incremental streams
503
506
 
504
507
 
505
- class Releases(SemiIncrementalMixin, GithubStream):
506
- """
507
- API docs: https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases
508
- """
509
-
510
- cursor_field = "created_at"
511
-
512
- def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
513
- record = super().transform(record=record, stream_slice=stream_slice)
514
-
515
- assets = record.get("assets", [])
516
- for asset in assets:
517
- uploader = asset.pop("uploader", None)
518
- asset["uploader_id"] = uploader.get("id") if uploader else None
519
-
520
- return record
521
-
522
-
523
508
  class Events(SemiIncrementalMixin, GithubStream):
524
509
  """
525
510
  API docs: https://docs.github.com/en/rest/activity/events?apiVersion=2022-11-28#list-repository-events
@@ -808,6 +793,132 @@ class GitHubGraphQLStream(GithubStream, ABC):
808
793
  return {}
809
794
 
810
795
 
796
+ class Releases(SemiIncrementalMixin, GitHubGraphQLStream):
797
+ """
798
+ API docs: https://docs.github.com/en/graphql/reference/objects#release
799
+ Uses GraphQL API to avoid the REST API's 10,000 result pagination limit.
800
+ """
801
+
802
+ cursor_field = "created_at"
803
+ is_sorted = "asc"
804
+
805
+ GRAPHQL_REACTION_TO_REST = {
806
+ "THUMBS_UP": "plus_one",
807
+ "THUMBS_DOWN": "minus_one",
808
+ "LAUGH": "laugh",
809
+ "HOORAY": "hooray",
810
+ "CONFUSED": "confused",
811
+ "HEART": "heart",
812
+ "ROCKET": "rocket",
813
+ "EYES": "eyes",
814
+ }
815
+
816
+ @staticmethod
817
+ def _extract_database_id_from_node_id(node_id: str) -> Optional[int]:
818
+ """Extract the numeric database ID from a GitHub GraphQL Node ID.
819
+
820
+ GitHub Node IDs with type prefixes (e.g. 'RA_...') are URL-safe base64
821
+ encodings of a msgpack array: [type_flag, repo_database_id, entity_database_id].
822
+ The last 4 bytes encode the entity's numeric database ID as a big-endian uint32.
823
+ """
824
+ if not node_id or "_" not in node_id:
825
+ return None
826
+ try:
827
+ encoded = node_id.split("_", 1)[1]
828
+ decoded = base64.urlsafe_b64decode(encoded + "==")
829
+ if len(decoded) >= 4:
830
+ return struct.unpack(">I", decoded[-4:])[0]
831
+ except Exception:
832
+ return None
833
+ return None
834
+
835
+ def _get_assets_from_release(self, record: Mapping) -> list:
836
+ assets_data = record.get("assets", {})
837
+ if assets_data.get("pageInfo", {}).get("hasNextPage"):
838
+ self.logger.warning(
839
+ "Release %s in %s has >100 assets; only the first 100 were synced. "
840
+ "Sub-pagination for release assets is not yet implemented.",
841
+ record.get("id"),
842
+ record.get("repository"),
843
+ )
844
+ assets = assets_data.get("nodes", [])
845
+ for asset in assets:
846
+ uploader = asset.pop("uploader", None)
847
+ asset["uploader_id"] = uploader.get("id") if uploader else None
848
+ asset["id"] = self._extract_database_id_from_node_id(asset.get("node_id"))
849
+ return assets
850
+
851
+ def _get_reactions_from_release(self, record: Mapping) -> Optional[Mapping]:
852
+ reaction_groups = record.pop("reaction_groups", None)
853
+ if reaction_groups is None:
854
+ return None
855
+ reactions = {rest_key: 0 for rest_key in self.GRAPHQL_REACTION_TO_REST.values()}
856
+ total = 0
857
+ for group in reaction_groups:
858
+ content = group.get("content")
859
+ count = group.get("reactors", {}).get("totalCount", 0)
860
+ rest_key = self.GRAPHQL_REACTION_TO_REST.get(content)
861
+ if rest_key:
862
+ reactions[rest_key] = count
863
+ total += count
864
+ reactions["total_count"] = total
865
+ return reactions
866
+
867
+ def _build_rest_urls(self, repository: str, release_id: int, tag_name: str) -> Mapping[str, str]:
868
+ """Synthesize REST-compatible URL fields from GraphQL data.
869
+
870
+ The GraphQL API does not return reference URLs the way the REST API does,
871
+ but we can construct them from the repository, release ID, and tag name
872
+ to retain backwards compatibility with the previous REST-based schema.
873
+ """
874
+ api_url = self.api_url.rstrip("/")
875
+ upload_url = api_url.replace("api.github.com", "uploads.github.com")
876
+ return {
877
+ "url": f"{api_url}/repos/{repository}/releases/{release_id}",
878
+ "assets_url": f"{api_url}/repos/{repository}/releases/{release_id}/assets",
879
+ "upload_url": f"{upload_url}/repos/{repository}/releases/{release_id}/assets{{?name,label}}",
880
+ "tarball_url": f"{api_url}/repos/{repository}/tarball/{tag_name}" if tag_name else None,
881
+ "zipball_url": f"{api_url}/repos/{repository}/zipball/{tag_name}" if tag_name else None,
882
+ }
883
+
884
+ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
885
+ repository = response.json().get("data", {}).get("repository")
886
+ if repository:
887
+ nodes = repository.get("releases", {}).get("nodes", [])
888
+ for record in nodes:
889
+ record["repository"] = self._get_repository_name(repository)
890
+ if record.get("author"):
891
+ record["author"]["type"] = record["author"].pop("__typename", "User")
892
+ record["assets"] = self._get_assets_from_release(record)
893
+ record["reactions"] = self._get_reactions_from_release(record)
894
+ mentions_connection = record.pop("mentions_connection", None)
895
+ if mentions_connection is not None:
896
+ record["mentions_count"] = mentions_connection.get("totalCount", 0)
897
+ tag_commit = record.pop("tagCommit", None)
898
+ record["target_commitish"] = tag_commit.get("target_commitish") if tag_commit else None
899
+ record.update(self._build_rest_urls(record["repository"], record.get("id"), record.get("tag_name")))
900
+ yield record
901
+
902
+ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
903
+ repository = response.json().get("data", {}).get("repository")
904
+ if repository:
905
+ page_info = repository.get("releases", {}).get("pageInfo", {})
906
+ if page_info.get("hasNextPage"):
907
+ return {"after": page_info["endCursor"]}
908
+ return None
909
+
910
+ def request_body_json(
911
+ self,
912
+ stream_state: Mapping[str, Any],
913
+ stream_slice: Mapping[str, Any] = None,
914
+ next_page_token: Mapping[str, Any] = None,
915
+ ) -> Optional[Mapping]:
916
+ organization, name = stream_slice["repository"].split("/")
917
+ after = next_page_token["after"] if next_page_token else None
918
+ query = get_query_releases(owner=organization, name=name, first=self.page_size, after=after)
919
+ return {"query": query}
920
+
921
+
811
922
  class PullRequestStats(SemiIncrementalMixin, GitHubGraphQLStream):
812
923
  """
813
924
  API docs: https://docs.github.com/en/graphql/reference/objects#pullrequest