airbyte-source-github 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. airbyte_source_github-1.6.1.dist-info/METADATA +111 -0
  2. {airbyte_source_github-1.6.0.dist-info → airbyte_source_github-1.6.1.dist-info}/RECORD +17 -43
  3. {airbyte_source_github-1.6.0.dist-info → airbyte_source_github-1.6.1.dist-info}/WHEEL +1 -2
  4. airbyte_source_github-1.6.1.dist-info/entry_points.txt +3 -0
  5. airbyte_source_github-1.6.0.dist-info/METADATA +0 -144
  6. airbyte_source_github-1.6.0.dist-info/entry_points.txt +0 -2
  7. airbyte_source_github-1.6.0.dist-info/top_level.txt +0 -3
  8. integration_tests/__init__.py +0 -0
  9. integration_tests/abnormal_state.json +0 -237
  10. integration_tests/acceptance.py +0 -16
  11. integration_tests/configured_catalog.json +0 -435
  12. integration_tests/configured_catalog_full_refresh_test.json +0 -415
  13. integration_tests/invalid_config.json +0 -5
  14. integration_tests/sample_config.json +0 -5
  15. integration_tests/sample_state.json +0 -137
  16. unit_tests/__init__.py +0 -3
  17. unit_tests/conftest.py +0 -29
  18. unit_tests/projects_v2_pull_requests_query.json +0 -3
  19. unit_tests/pull_request_stats_query.json +0 -3
  20. unit_tests/responses/contributor_activity_response.json +0 -33
  21. unit_tests/responses/graphql_reviews_responses.json +0 -405
  22. unit_tests/responses/issue_timeline_events.json +0 -166
  23. unit_tests/responses/issue_timeline_events_response.json +0 -170
  24. unit_tests/responses/projects_v2_response.json +0 -45
  25. unit_tests/responses/pull_request_comment_reactions.json +0 -744
  26. unit_tests/responses/pull_request_stats_response.json +0 -317
  27. unit_tests/test_migrations/test_config.json +0 -8
  28. unit_tests/test_migrations/test_new_config.json +0 -8
  29. unit_tests/test_multiple_token_authenticator.py +0 -163
  30. unit_tests/test_source.py +0 -331
  31. unit_tests/test_stream.py +0 -1471
  32. unit_tests/utils.py +0 -78
unit_tests/test_stream.py DELETED
@@ -1,1471 +0,0 @@
1
- #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- import json
6
- from http import HTTPStatus
7
- from pathlib import Path
8
- from unittest.mock import MagicMock, patch
9
-
10
- import pytest
11
- import requests
12
- import responses
13
- from airbyte_cdk.models import ConfiguredAirbyteCatalog, SyncMode
14
- from airbyte_cdk.sources.streams.http.exceptions import BaseBackoffException, UserDefinedBackoffException
15
- from requests import HTTPError
16
- from responses import matchers
17
- from source_github import SourceGithub, constants
18
- from source_github.streams import (
19
- Branches,
20
- Collaborators,
21
- Comments,
22
- CommitCommentReactions,
23
- CommitComments,
24
- Commits,
25
- ContributorActivity,
26
- Deployments,
27
- IssueEvents,
28
- IssueLabels,
29
- IssueMilestones,
30
- IssueTimelineEvents,
31
- Organizations,
32
- ProjectCards,
33
- ProjectColumns,
34
- Projects,
35
- ProjectsV2,
36
- PullRequestCommentReactions,
37
- PullRequestCommits,
38
- PullRequests,
39
- PullRequestStats,
40
- Releases,
41
- Repositories,
42
- RepositoryStats,
43
- Reviews,
44
- Stargazers,
45
- Tags,
46
- TeamMembers,
47
- TeamMemberships,
48
- Teams,
49
- Users,
50
- WorkflowJobs,
51
- WorkflowRuns,
52
- )
53
- from source_github.utils import read_full_refresh
54
-
55
- from .utils import ProjectsResponsesAPI, read_incremental
56
-
57
- DEFAULT_BACKOFF_DELAYS = [5, 10, 20, 40, 80]
58
-
59
-
60
- @responses.activate
61
- @patch("time.sleep")
62
- def test_internal_server_error_retry(time_mock):
63
- args = {"authenticator": None, "repositories": ["airbytehq/airbyte"], "start_date": "start_date", "page_size_for_large_streams": 30}
64
- stream = CommitCommentReactions(**args)
65
- stream_slice = {"repository": "airbytehq/airbyte", "comment_id": "id"}
66
-
67
- time_mock.reset_mock()
68
- responses.add("GET", "https://api.github.com/repos/airbytehq/airbyte/comments/id/reactions", status=HTTPStatus.INTERNAL_SERVER_ERROR)
69
- with pytest.raises(BaseBackoffException):
70
- list(stream.read_records(sync_mode="full_refresh", stream_slice=stream_slice))
71
-
72
- sleep_delays = [delay[0][0] for delay in time_mock.call_args_list]
73
- assert sleep_delays == DEFAULT_BACKOFF_DELAYS
74
-
75
-
76
- @pytest.mark.parametrize(
77
- ("http_status", "response_headers", "expected_backoff_time"),
78
- [
79
- (HTTPStatus.BAD_GATEWAY, {}, None),
80
- (HTTPStatus.INTERNAL_SERVER_ERROR, {}, None),
81
- (HTTPStatus.SERVICE_UNAVAILABLE, {}, None),
82
- (HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, 60),
83
- (HTTPStatus.FORBIDDEN, {"Retry-After": "30"}, 60),
84
- (HTTPStatus.FORBIDDEN, {"Retry-After": "120"}, 120),
85
- (HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": "1655804454"}, 60.0),
86
- (HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": "1655804724"}, 300.0),
87
- ],
88
- )
89
- @patch("time.time", return_value=1655804424.0)
90
- def test_backoff_time(time_mock, http_status, response_headers, expected_backoff_time):
91
- response_mock = MagicMock()
92
- response_mock.status_code = http_status
93
- response_mock.headers = response_headers
94
- args = {"authenticator": None, "repositories": ["test_repo"], "start_date": "start_date", "page_size_for_large_streams": 30}
95
- stream = PullRequestCommentReactions(**args)
96
- assert stream.backoff_time(response_mock) == expected_backoff_time
97
-
98
-
99
- @pytest.mark.parametrize(
100
- ("http_status", "response_headers", "text"),
101
- [
102
- (HTTPStatus.OK, {"X-RateLimit-Resource": "graphql"}, '{"errors": [{"type": "RATE_LIMITED"}]}'),
103
- (HTTPStatus.FORBIDDEN, {"X-RateLimit-Remaining": "0"}, ""),
104
- (HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, ""),
105
- (HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, ""),
106
- (HTTPStatus.INTERNAL_SERVER_ERROR, {}, ""),
107
- (HTTPStatus.BAD_GATEWAY, {}, ""),
108
- (HTTPStatus.SERVICE_UNAVAILABLE, {}, ""),
109
- ],
110
- )
111
- def test_should_retry(http_status, response_headers, text):
112
- stream = RepositoryStats(repositories=["test_repo"], page_size_for_large_streams=30)
113
- response_mock = MagicMock()
114
- response_mock.status_code = http_status
115
- response_mock.headers = response_headers
116
- response_mock.text = text
117
- response_mock.json = lambda: json.loads(text)
118
- assert stream.should_retry(response_mock)
119
-
120
-
121
- @responses.activate
122
- @patch("time.sleep")
123
- def test_retry_after(time_mock):
124
- first_request = True
125
-
126
- def request_callback(request):
127
- nonlocal first_request
128
- if first_request:
129
- first_request = False
130
- return (HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, "")
131
- return (HTTPStatus.OK, {}, '{"login": "airbytehq"}')
132
-
133
- responses.add_callback(
134
- responses.GET,
135
- "https://api.github.com/orgs/airbytehq",
136
- callback=request_callback,
137
- content_type="application/json",
138
- )
139
-
140
- stream = Organizations(organizations=["airbytehq"])
141
- list(read_full_refresh(stream))
142
- assert len(responses.calls) == 2
143
- assert responses.calls[0].request.url == "https://api.github.com/orgs/airbytehq?per_page=100"
144
- assert responses.calls[1].request.url == "https://api.github.com/orgs/airbytehq?per_page=100"
145
-
146
-
147
- @responses.activate
148
- @patch("time.sleep")
149
- @patch("time.time", return_value=1655804424.0)
150
- def test_graphql_rate_limited(time_mock, sleep_mock):
151
- response_objects = [
152
- (
153
- HTTPStatus.OK,
154
- {"X-RateLimit-Limit": "5000", "X-RateLimit-Resource": "graphql", "X-RateLimit-Reset": "1655804724"},
155
- json.dumps({"errors": [{"type": "RATE_LIMITED"}]}),
156
- ),
157
- (
158
- HTTPStatus.OK,
159
- {"X-RateLimit-Limit": "5000", "X-RateLimit-Resource": "graphql", "X-RateLimit-Reset": "1655808324"},
160
- json.dumps({"data": {"repository": None}}),
161
- ),
162
- ]
163
-
164
- responses.add_callback(
165
- responses.POST,
166
- "https://api.github.com/graphql",
167
- callback=lambda r: response_objects.pop(0),
168
- content_type="application/json",
169
- )
170
-
171
- stream = PullRequestStats(repositories=["airbytehq/airbyte"], page_size_for_large_streams=30)
172
- records = list(read_full_refresh(stream))
173
- assert records == []
174
- assert len(responses.calls) == 2
175
- assert responses.calls[0].request.url == "https://api.github.com/graphql"
176
- assert responses.calls[1].request.url == "https://api.github.com/graphql"
177
- assert sum([c[0][0] for c in sleep_mock.call_args_list]) > 300
178
-
179
-
180
- @responses.activate
181
- def test_stream_teams_404():
182
- organization_args = {"organizations": ["org_name"]}
183
- stream = Teams(**organization_args)
184
-
185
- responses.add(
186
- "GET",
187
- "https://api.github.com/orgs/org_name/teams",
188
- status=requests.codes.NOT_FOUND,
189
- json={"message": "Not Found", "documentation_url": "https://docs.github.com/rest/reference/teams#list-teams"},
190
- )
191
-
192
- assert list(read_full_refresh(stream)) == []
193
- assert len(responses.calls) == 1
194
- assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/teams?per_page=100"
195
-
196
-
197
- @responses.activate
198
- @patch("time.sleep")
199
- def test_stream_teams_502(sleep_mock):
200
- organization_args = {"organizations": ["org_name"]}
201
- stream = Teams(**organization_args)
202
-
203
- url = "https://api.github.com/orgs/org_name/teams"
204
- responses.add(
205
- method="GET",
206
- url=url,
207
- status=requests.codes.BAD_GATEWAY,
208
- json={"message": "Server Error"},
209
- )
210
-
211
- assert list(read_full_refresh(stream)) == []
212
- assert len(responses.calls) == 6
213
- # Check whether url is the same for all response.calls
214
- assert set(call.request.url for call in responses.calls).symmetric_difference({f"{url}?per_page=100"}) == set()
215
-
216
-
217
- def test_stream_organizations_availability_report():
218
- organization_args = {"organizations": ["org1", "org2"]}
219
- stream = Organizations(**organization_args)
220
- assert stream.availability_strategy is None
221
-
222
-
223
- @responses.activate
224
- def test_stream_organizations_read():
225
- organization_args = {"organizations": ["org1", "org2"]}
226
- stream = Organizations(**organization_args)
227
- responses.add("GET", "https://api.github.com/orgs/org1", json={"id": 1})
228
- responses.add("GET", "https://api.github.com/orgs/org2", json={"id": 2})
229
- records = list(read_full_refresh(stream))
230
- assert records == [{"id": 1}, {"id": 2}]
231
-
232
-
233
- @responses.activate
234
- def test_stream_teams_read():
235
- organization_args = {"organizations": ["org1", "org2"]}
236
- stream = Teams(**organization_args)
237
- stream._session.cache.clear()
238
- responses.add("GET", "https://api.github.com/orgs/org1/teams", json=[{"id": 1}, {"id": 2}])
239
- responses.add("GET", "https://api.github.com/orgs/org2/teams", json=[{"id": 3}])
240
- records = list(read_full_refresh(stream))
241
- assert records == [{"id": 1, "organization": "org1"}, {"id": 2, "organization": "org1"}, {"id": 3, "organization": "org2"}]
242
- assert len(responses.calls) == 2
243
- assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/teams?per_page=100"
244
- assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/teams?per_page=100"
245
-
246
-
247
- @responses.activate
248
- def test_stream_users_read():
249
- organization_args = {"organizations": ["org1", "org2"]}
250
- stream = Users(**organization_args)
251
- responses.add("GET", "https://api.github.com/orgs/org1/members", json=[{"id": 1}, {"id": 2}])
252
- responses.add("GET", "https://api.github.com/orgs/org2/members", json=[{"id": 3}])
253
- records = list(read_full_refresh(stream))
254
- assert records == [{"id": 1, "organization": "org1"}, {"id": 2, "organization": "org1"}, {"id": 3, "organization": "org2"}]
255
- assert len(responses.calls) == 2
256
- assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/members?per_page=100"
257
- assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/members?per_page=100"
258
-
259
-
260
- @responses.activate
261
- def test_stream_repositories_404():
262
- organization_args = {"organizations": ["org_name"]}
263
- stream = Repositories(**organization_args)
264
-
265
- responses.add(
266
- "GET",
267
- "https://api.github.com/orgs/org_name/repos",
268
- status=requests.codes.NOT_FOUND,
269
- json={"message": "Not Found", "documentation_url": "https://docs.github.com/rest/reference/repos#list-organization-repositories"},
270
- )
271
-
272
- assert list(read_full_refresh(stream)) == []
273
- assert len(responses.calls) == 1
274
- assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100&sort=updated&direction=desc"
275
-
276
-
277
- @responses.activate
278
- def test_stream_repositories_401(caplog):
279
- organization_args = {"organizations": ["org_name"], "access_token_type": constants.PERSONAL_ACCESS_TOKEN_TITLE}
280
- stream = Repositories(**organization_args)
281
-
282
- responses.add(
283
- "GET",
284
- "https://api.github.com/orgs/org_name/repos",
285
- status=requests.codes.UNAUTHORIZED,
286
- json={"message": "Bad credentials", "documentation_url": "https://docs.github.com/rest"},
287
- )
288
-
289
- with pytest.raises(HTTPError):
290
- assert list(read_full_refresh(stream)) == []
291
-
292
- assert len(responses.calls) == 1
293
- assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100&sort=updated&direction=desc"
294
- assert "Personal Access Token renewal is required: Bad credentials" in caplog.messages
295
-
296
-
297
- @responses.activate
298
- def test_stream_repositories_read():
299
- organization_args = {"organizations": ["org1", "org2"]}
300
- stream = Repositories(**organization_args)
301
- updated_at = "2020-01-01T00:00:00Z"
302
- responses.add(
303
- "GET", "https://api.github.com/orgs/org1/repos", json=[{"id": 1, "updated_at": updated_at}, {"id": 2, "updated_at": updated_at}]
304
- )
305
- responses.add("GET", "https://api.github.com/orgs/org2/repos", json=[{"id": 3, "updated_at": updated_at}])
306
- records = list(read_full_refresh(stream))
307
- assert records == [
308
- {"id": 1, "organization": "org1", "updated_at": updated_at},
309
- {"id": 2, "organization": "org1", "updated_at": updated_at},
310
- {"id": 3, "organization": "org2", "updated_at": updated_at},
311
- ]
312
- assert len(responses.calls) == 2
313
- assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/repos?per_page=100&sort=updated&direction=desc"
314
- assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/repos?per_page=100&sort=updated&direction=desc"
315
-
316
-
317
- @responses.activate
318
- def test_stream_projects_disabled():
319
-
320
- repository_args_with_start_date = {"start_date": "start_date", "page_size_for_large_streams": 30, "repositories": ["test_repo"]}
321
-
322
- stream = Projects(**repository_args_with_start_date)
323
- responses.add(
324
- "GET",
325
- "https://api.github.com/repos/test_repo/projects",
326
- status=requests.codes.GONE,
327
- json={"message": "Projects are disabled for this repository", "documentation_url": "https://docs.github.com/v3/projects"},
328
- )
329
-
330
- assert list(read_full_refresh(stream)) == []
331
- assert len(responses.calls) == 1
332
- assert responses.calls[0].request.url == "https://api.github.com/repos/test_repo/projects?per_page=100&state=all"
333
-
334
-
335
- @responses.activate
336
- def test_stream_pull_requests_incremental_read():
337
-
338
- page_size = 2
339
- repository_args_with_start_date = {
340
- "repositories": ["organization/repository"],
341
- "page_size_for_large_streams": page_size,
342
- "start_date": "2022-02-02T10:10:03Z",
343
- }
344
-
345
- stream = PullRequests(**repository_args_with_start_date)
346
-
347
- data = [
348
- {"id": 1, "updated_at": "2022-02-02T10:10:02Z"},
349
- {"id": 2, "updated_at": "2022-02-02T10:10:04Z"},
350
- {"id": 3, "updated_at": "2022-02-02T10:10:06Z"},
351
- {"id": 4, "updated_at": "2022-02-02T10:10:08Z"},
352
- {"id": 5, "updated_at": "2022-02-02T10:10:10Z"},
353
- {"id": 6, "updated_at": "2022-02-02T10:10:12Z"},
354
- ]
355
-
356
- api_url = "https://api.github.com/repos/organization/repository/pulls"
357
-
358
- responses.add(
359
- "GET",
360
- api_url,
361
- json=data[0:2],
362
- headers={"Link": '<https://api.github.com/repositories/400052213/pulls?page=2>; rel="next"'},
363
- match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "asc"}, strict_match=False)],
364
- )
365
-
366
- responses.add(
367
- "GET",
368
- api_url,
369
- json=data[2:4],
370
- match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "asc", "page": "2"}, strict_match=False)],
371
- )
372
-
373
- responses.add(
374
- "GET",
375
- api_url,
376
- json=data[5:3:-1],
377
- headers={"Link": '<https://api.github.com/repositories/400052213/pulls?page=2>; rel="next"'},
378
- match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "desc"}, strict_match=False)],
379
- )
380
-
381
- responses.add(
382
- "GET",
383
- api_url,
384
- json=data[3:1:-1],
385
- headers={"Link": '<https://api.github.com/repositories/400052213/pulls?page=3>; rel="next"'},
386
- match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "desc", "page": "2"}, strict_match=False)],
387
- )
388
-
389
- stream_state = {}
390
- records = read_incremental(stream, stream_state)
391
- assert [r["id"] for r in records] == [2, 3, 4]
392
- assert stream_state == {"organization/repository": {"updated_at": "2022-02-02T10:10:08Z"}}
393
-
394
- records = read_incremental(stream, stream_state)
395
- assert [r["id"] for r in records] == [6, 5]
396
- assert stream_state == {"organization/repository": {"updated_at": "2022-02-02T10:10:12Z"}}
397
-
398
-
399
- @responses.activate
400
- def test_stream_commits_incremental_read():
401
-
402
- repository_args_with_start_date = {
403
- "repositories": ["organization/repository"],
404
- "page_size_for_large_streams": 100,
405
- "start_date": "2022-02-02T10:10:03Z",
406
- }
407
-
408
- default_branches = {"organization/repository": "master"}
409
- branches_to_pull = {"organization/repository": ["branch"]}
410
-
411
- stream = Commits(**repository_args_with_start_date, branches_to_pull=branches_to_pull, default_branches=default_branches)
412
- stream.page_size = 2
413
-
414
- data = [
415
- {"sha": 1, "commit": {"author": {"date": "2022-02-02T10:10:02Z"}}},
416
- {"sha": 2, "commit": {"author": {"date": "2022-02-02T10:10:04Z"}}},
417
- {"sha": 3, "commit": {"author": {"date": "2022-02-02T10:10:06Z"}}},
418
- {"sha": 4, "commit": {"author": {"date": "2022-02-02T10:10:08Z"}}},
419
- {"sha": 5, "commit": {"author": {"date": "2022-02-02T10:10:10Z"}}},
420
- {"sha": 6, "commit": {"author": {"date": "2022-02-02T10:10:12Z"}}},
421
- {"sha": 7, "commit": {"author": {"date": "2022-02-02T10:10:14Z"}}},
422
- ]
423
-
424
- api_url = "https://api.github.com/repos/organization/repository/commits"
425
-
426
- responses.add(
427
- "GET",
428
- api_url,
429
- json=data[0:3],
430
- match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:03Z", "sha": "branch", "per_page": "2"}, strict_match=False)],
431
- )
432
-
433
- responses.add(
434
- "GET",
435
- api_url,
436
- json=data[3:5],
437
- headers={"Link": '<https://api.github.com/repos/organization/repository/commits?page=2>; rel="next"'},
438
- match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:06Z", "sha": "branch", "per_page": "2"}, strict_match=False)],
439
- )
440
-
441
- responses.add(
442
- "GET",
443
- api_url,
444
- json=data[5:7],
445
- match=[
446
- matchers.query_param_matcher(
447
- {"since": "2022-02-02T10:10:06Z", "sha": "branch", "per_page": "2", "page": "2"}, strict_match=False
448
- )
449
- ],
450
- )
451
-
452
- stream_state = {}
453
- records = read_incremental(stream, stream_state)
454
- assert [r["sha"] for r in records] == [2, 3]
455
- assert stream_state == {"organization/repository": {"branch": {"created_at": "2022-02-02T10:10:06Z"}}}
456
- records = read_incremental(stream, stream_state)
457
- assert [r["sha"] for r in records] == [4, 5, 6, 7]
458
- assert stream_state == {"organization/repository": {"branch": {"created_at": "2022-02-02T10:10:14Z"}}}
459
-
460
-
461
- @responses.activate
462
- def test_stream_pull_request_commits():
463
-
464
- repository_args = {
465
- "repositories": ["organization/repository"],
466
- "page_size_for_large_streams": 100,
467
- }
468
- repository_args_with_start_date = {**repository_args, "start_date": "2022-02-02T10:10:02Z"}
469
-
470
- stream = PullRequestCommits(PullRequests(**repository_args_with_start_date), **repository_args)
471
-
472
- responses.add(
473
- "GET",
474
- "https://api.github.com/repos/organization/repository/pulls",
475
- json=[
476
- {"id": 1, "updated_at": "2022-02-02T10:10:02Z", "number": 1},
477
- {"id": 2, "updated_at": "2022-02-02T10:10:04Z", "number": 2},
478
- {"id": 3, "updated_at": "2022-02-02T10:10:06Z", "number": 3},
479
- ],
480
- )
481
-
482
- responses.add(
483
- "GET",
484
- "https://api.github.com/repos/organization/repository/pulls/2/commits",
485
- json=[{"sha": 1}, {"sha": 2}],
486
- )
487
-
488
- responses.add(
489
- "GET",
490
- "https://api.github.com/repos/organization/repository/pulls/3/commits",
491
- json=[{"sha": 3}, {"sha": 4}],
492
- )
493
-
494
- records = list(read_full_refresh(stream))
495
- assert records == [
496
- {"sha": 1, "repository": "organization/repository", "pull_number": 2},
497
- {"sha": 2, "repository": "organization/repository", "pull_number": 2},
498
- {"sha": 3, "repository": "organization/repository", "pull_number": 3},
499
- {"sha": 4, "repository": "organization/repository", "pull_number": 3},
500
- ]
501
-
502
-
503
- @responses.activate
504
- def test_stream_project_columns():
505
-
506
- repository_args_with_start_date = {
507
- "repositories": ["organization/repository"],
508
- "page_size_for_large_streams": 100,
509
- "start_date": "2022-02-01T00:00:00Z",
510
- }
511
-
512
- data = [
513
- {
514
- "updated_at": "2022-01-01T10:00:00Z",
515
- },
516
- {
517
- "updated_at": "2022-03-01T10:00:00Z",
518
- "columns": [
519
- {"updated_at": "2022-01-01T10:00:00Z"},
520
- {"updated_at": "2022-03-01T09:00:00Z"},
521
- {"updated_at": "2022-03-01T10:00:00Z"},
522
- ],
523
- },
524
- {
525
- "updated_at": "2022-05-01T10:00:00Z",
526
- "columns": [
527
- {"updated_at": "2022-01-01T10:00:00Z"},
528
- {"updated_at": "2022-05-01T10:00:00Z"},
529
- ],
530
- },
531
- ]
532
-
533
- ProjectsResponsesAPI.register(data)
534
-
535
- projects_stream = Projects(**repository_args_with_start_date)
536
- stream = ProjectColumns(projects_stream, **repository_args_with_start_date)
537
- projects_stream._session.cache.clear()
538
- stream._session.cache.clear()
539
- stream_state = {}
540
-
541
- records = read_incremental(stream, stream_state=stream_state)
542
-
543
- assert records == [
544
- {"id": 22, "name": "column_22", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-03-01T09:00:00Z"},
545
- {"id": 23, "name": "column_23", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-03-01T10:00:00Z"},
546
- {"id": 32, "name": "column_32", "project_id": 3, "repository": "organization/repository", "updated_at": "2022-05-01T10:00:00Z"},
547
- ]
548
-
549
- assert stream_state == {
550
- "organization/repository": {"2": {"updated_at": "2022-03-01T10:00:00Z"}, "3": {"updated_at": "2022-05-01T10:00:00Z"}}
551
- }
552
-
553
- data = [
554
- {"updated_at": "2022-01-01T10:00:00Z"},
555
- {
556
- "updated_at": "2022-04-01T10:00:00Z",
557
- "columns": [
558
- {"updated_at": "2022-01-01T10:00:00Z"},
559
- {"updated_at": "2022-03-01T09:00:00Z"},
560
- {"updated_at": "2022-03-01T10:00:00Z"},
561
- {"updated_at": "2022-04-01T10:00:00Z"},
562
- ],
563
- },
564
- {
565
- "updated_at": "2022-05-01T10:00:00Z",
566
- "columns": [
567
- {"updated_at": "2022-01-01T10:00:00Z"},
568
- {"updated_at": "2022-05-01T10:00:00Z"},
569
- ],
570
- },
571
- {
572
- "updated_at": "2022-06-01T10:00:00Z",
573
- "columns": [{"updated_at": "2022-06-01T10:00:00Z"}],
574
- },
575
- ]
576
-
577
- ProjectsResponsesAPI.register(data)
578
-
579
- projects_stream._session.cache.clear()
580
- stream._session.cache.clear()
581
- records = read_incremental(stream, stream_state=stream_state)
582
- assert records == [
583
- {"id": 24, "name": "column_24", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-04-01T10:00:00Z"},
584
- {"id": 41, "name": "column_41", "project_id": 4, "repository": "organization/repository", "updated_at": "2022-06-01T10:00:00Z"},
585
- ]
586
-
587
- assert stream_state == {
588
- "organization/repository": {
589
- "2": {"updated_at": "2022-04-01T10:00:00Z"},
590
- "3": {"updated_at": "2022-05-01T10:00:00Z"},
591
- "4": {"updated_at": "2022-06-01T10:00:00Z"},
592
- }
593
- }
594
-
595
-
596
- @responses.activate
597
- def test_stream_project_cards():
598
-
599
- repository_args_with_start_date = {
600
- "repositories": ["organization/repository"],
601
- "page_size_for_large_streams": 100,
602
- "start_date": "2022-03-01T00:00:00Z",
603
- }
604
-
605
- projects_stream = Projects(**repository_args_with_start_date)
606
- project_columns_stream = ProjectColumns(projects_stream, **repository_args_with_start_date)
607
- stream = ProjectCards(project_columns_stream, **repository_args_with_start_date)
608
-
609
- data = [
610
- {
611
- "updated_at": "2022-01-01T00:00:00Z",
612
- },
613
- {
614
- "updated_at": "2022-06-01T00:00:00Z",
615
- "columns": [
616
- {
617
- "updated_at": "2022-04-01T00:00:00Z",
618
- "cards": [
619
- {"updated_at": "2022-03-01T00:00:00Z"},
620
- {"updated_at": "2022-04-01T00:00:00Z"},
621
- ],
622
- },
623
- {"updated_at": "2022-05-01T09:00:00Z"},
624
- {
625
- "updated_at": "2022-06-01T00:00:00Z",
626
- "cards": [
627
- {"updated_at": "2022-05-01T00:00:00Z"},
628
- {"updated_at": "2022-06-01T00:00:00Z"},
629
- ],
630
- },
631
- ],
632
- },
633
- {
634
- "updated_at": "2022-05-01T00:00:00Z",
635
- "columns": [
636
- {"updated_at": "2022-01-01T00:00:00Z"},
637
- {
638
- "updated_at": "2022-05-01T00:00:00Z",
639
- "cards": [
640
- {"updated_at": "2022-02-01T00:00:00Z"},
641
- {"updated_at": "2022-05-01T00:00:00Z"},
642
- ],
643
- },
644
- ],
645
- },
646
- ]
647
-
648
- ProjectsResponsesAPI.register(data)
649
-
650
- stream_state = {}
651
-
652
- projects_stream._session.cache.clear()
653
- project_columns_stream._session.cache.clear()
654
- records = read_incremental(stream, stream_state=stream_state)
655
-
656
- assert records == [
657
- {
658
- "column_id": 21,
659
- "id": 212,
660
- "name": "card_212",
661
- "project_id": 2,
662
- "repository": "organization/repository",
663
- "updated_at": "2022-04-01T00:00:00Z",
664
- },
665
- {
666
- "column_id": 23,
667
- "id": 231,
668
- "name": "card_231",
669
- "project_id": 2,
670
- "repository": "organization/repository",
671
- "updated_at": "2022-05-01T00:00:00Z",
672
- },
673
- {
674
- "column_id": 23,
675
- "id": 232,
676
- "name": "card_232",
677
- "project_id": 2,
678
- "repository": "organization/repository",
679
- "updated_at": "2022-06-01T00:00:00Z",
680
- },
681
- {
682
- "column_id": 32,
683
- "id": 322,
684
- "name": "card_322",
685
- "project_id": 3,
686
- "repository": "organization/repository",
687
- "updated_at": "2022-05-01T00:00:00Z",
688
- },
689
- ]
690
-
691
-
692
- @responses.activate
693
- def test_stream_comments():
694
-
695
- repository_args_with_start_date = {
696
- "repositories": ["organization/repository", "airbytehq/airbyte"],
697
- "page_size_for_large_streams": 2,
698
- "start_date": "2022-02-02T10:10:01Z",
699
- }
700
-
701
- stream = Comments(**repository_args_with_start_date)
702
-
703
- data = [
704
- {"id": 1, "updated_at": "2022-02-02T10:10:02Z"},
705
- {"id": 2, "updated_at": "2022-02-02T10:10:04Z"},
706
- {"id": 3, "updated_at": "2022-02-02T10:12:06Z"},
707
- {"id": 4, "updated_at": "2022-02-02T10:12:08Z"},
708
- {"id": 5, "updated_at": "2022-02-02T10:12:10Z"},
709
- {"id": 6, "updated_at": "2022-02-02T10:12:12Z"},
710
- ]
711
-
712
- api_url = "https://api.github.com/repos/organization/repository/issues/comments"
713
-
714
- responses.add(
715
- "GET",
716
- api_url,
717
- json=data[0:2],
718
- match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:01Z", "per_page": "2"})],
719
- )
720
-
721
- responses.add(
722
- "GET",
723
- api_url,
724
- json=data[1:3],
725
- headers={
726
- "Link": '<https://api.github.com/repos/organization/repository/issues/comments?per_page=2&since=2022-02-02T10%3A10%3A04Z&page=2>; rel="next"'
727
- },
728
- match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:04Z", "per_page": "2"})],
729
- )
730
-
731
- responses.add(
732
- "GET",
733
- api_url,
734
- json=data[3:5],
735
- headers={
736
- "Link": '<https://api.github.com/repos/organization/repository/issues/comments?per_page=2&since=2022-02-02T10%3A10%3A04Z&page=3>; rel="next"'
737
- },
738
- match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:04Z", "page": "2", "per_page": "2"})],
739
- )
740
-
741
- responses.add(
742
- "GET",
743
- api_url,
744
- json=data[5:],
745
- match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:04Z", "page": "3", "per_page": "2"})],
746
- )
747
-
748
- data = [
749
- {"id": 1, "updated_at": "2022-02-02T10:11:02Z"},
750
- {"id": 2, "updated_at": "2022-02-02T10:11:04Z"},
751
- {"id": 3, "updated_at": "2022-02-02T10:13:06Z"},
752
- {"id": 4, "updated_at": "2022-02-02T10:13:08Z"},
753
- {"id": 5, "updated_at": "2022-02-02T10:13:10Z"},
754
- {"id": 6, "updated_at": "2022-02-02T10:13:12Z"},
755
- ]
756
-
757
- api_url = "https://api.github.com/repos/airbytehq/airbyte/issues/comments"
758
-
759
- responses.add(
760
- "GET",
761
- api_url,
762
- json=data[0:2],
763
- match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:01Z", "per_page": "2"})],
764
- )
765
-
766
- responses.add(
767
- "GET",
768
- api_url,
769
- json=data[1:3],
770
- headers={
771
- "Link": '<https://api.github.com/repos/airbytehq/airbyte/issues/comments?per_page=2&since=2022-02-02T10%3A11%3A04Z&page=2>; rel="next"'
772
- },
773
- match=[matchers.query_param_matcher({"since": "2022-02-02T10:11:04Z", "per_page": "2"})],
774
- )
775
-
776
- responses.add(
777
- "GET",
778
- api_url,
779
- json=data[3:5],
780
- headers={
781
- "Link": '<https://api.github.com/repos/airbytehq/airbyte/issues/comments?per_page=2&since=2022-02-02T10%3A11%3A04Z&page=3>; rel="next"'
782
- },
783
- match=[matchers.query_param_matcher({"since": "2022-02-02T10:11:04Z", "page": "2", "per_page": "2"})],
784
- )
785
-
786
- responses.add(
787
- "GET",
788
- api_url,
789
- json=data[5:],
790
- match=[matchers.query_param_matcher({"since": "2022-02-02T10:11:04Z", "page": "3", "per_page": "2"})],
791
- )
792
-
793
- stream_state = {}
794
- records = read_incremental(stream, stream_state)
795
- assert records == [
796
- {"id": 1, "repository": "organization/repository", "updated_at": "2022-02-02T10:10:02Z"},
797
- {"id": 2, "repository": "organization/repository", "updated_at": "2022-02-02T10:10:04Z"},
798
- {"id": 1, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:11:02Z"},
799
- {"id": 2, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:11:04Z"},
800
- ]
801
-
802
- assert stream_state == {
803
- "airbytehq/airbyte": {"updated_at": "2022-02-02T10:11:04Z"},
804
- "organization/repository": {"updated_at": "2022-02-02T10:10:04Z"},
805
- }
806
-
807
- records = read_incremental(stream, stream_state)
808
- assert records == [
809
- {"id": 3, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:06Z"},
810
- {"id": 4, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:08Z"},
811
- {"id": 5, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:10Z"},
812
- {"id": 6, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:12Z"},
813
- {"id": 3, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:06Z"},
814
- {"id": 4, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:08Z"},
815
- {"id": 5, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:10Z"},
816
- {"id": 6, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:12Z"},
817
- ]
818
- assert stream_state == {
819
- "airbytehq/airbyte": {"updated_at": "2022-02-02T10:13:12Z"},
820
- "organization/repository": {"updated_at": "2022-02-02T10:12:12Z"},
821
- }
822
-
823
-
824
- @responses.activate
825
- def test_streams_read_full_refresh():
826
-
827
- repository_args = {
828
- "repositories": ["organization/repository"],
829
- "page_size_for_large_streams": 100,
830
- }
831
-
832
- repository_args_with_start_date = {**repository_args, "start_date": "2022-02-01T00:00:00Z"}
833
-
834
- def get_json_response(cursor_field):
835
- cursor_field = cursor_field or "updated_at"
836
- return [
837
- {"id": 1, cursor_field: "2022-02-01T00:00:00Z"},
838
- {"id": 2, cursor_field: "2022-02-02T00:00:00Z"},
839
- ]
840
-
841
- def get_records(cursor_field):
842
- cursor_field = cursor_field or "updated_at"
843
- return [
844
- {"id": 1, cursor_field: "2022-02-01T00:00:00Z", "repository": "organization/repository"},
845
- {"id": 2, cursor_field: "2022-02-02T00:00:00Z", "repository": "organization/repository"},
846
- ]
847
-
848
- for cls, url in [
849
- (Releases, "https://api.github.com/repos/organization/repository/releases"),
850
- (IssueEvents, "https://api.github.com/repos/organization/repository/issues/events"),
851
- (IssueMilestones, "https://api.github.com/repos/organization/repository/milestones"),
852
- (CommitComments, "https://api.github.com/repos/organization/repository/comments"),
853
- (Deployments, "https://api.github.com/repos/organization/repository/deployments"),
854
- ]:
855
- stream = cls(**repository_args_with_start_date)
856
- responses.add("GET", url, json=get_json_response(stream.cursor_field))
857
- records = list(read_full_refresh(stream))
858
- assert records == get_records(stream.cursor_field)[1:2]
859
-
860
- for cls, url in [
861
- (Tags, "https://api.github.com/repos/organization/repository/tags"),
862
- (IssueLabels, "https://api.github.com/repos/organization/repository/labels"),
863
- (Collaborators, "https://api.github.com/repos/organization/repository/collaborators"),
864
- (Branches, "https://api.github.com/repos/organization/repository/branches"),
865
- ]:
866
- stream = cls(**repository_args)
867
- responses.add("GET", url, json=get_json_response(stream.cursor_field))
868
- records = list(read_full_refresh(stream))
869
- assert records == get_records(stream.cursor_field)
870
-
871
- responses.add(
872
- "GET",
873
- "https://api.github.com/repos/organization/repository/stargazers",
874
- json=[
875
- {"starred_at": "2022-02-01T00:00:00Z", "user": {"id": 1}},
876
- {"starred_at": "2022-02-02T00:00:00Z", "user": {"id": 2}},
877
- ],
878
- )
879
-
880
- stream = Stargazers(**repository_args_with_start_date)
881
- records = list(read_full_refresh(stream))
882
- assert records == [{"repository": "organization/repository", "starred_at": "2022-02-02T00:00:00Z", "user": {"id": 2}, "user_id": 2}]
883
-
884
-
885
- @responses.activate
886
- def test_stream_reviews_incremental_read():
887
-
888
- repository_args_with_start_date = {
889
- "start_date": "2000-01-01T00:00:00Z",
890
- "page_size_for_large_streams": 30,
891
- "repositories": ["airbytehq/airbyte"],
892
- }
893
- stream = Reviews(**repository_args_with_start_date)
894
- stream.page_size = 2
895
-
896
- f = Path(__file__).parent / "responses/graphql_reviews_responses.json"
897
- response_objects = json.load(open(f))
898
-
899
- def request_callback(request):
900
- return (HTTPStatus.OK, {}, json.dumps(response_objects.pop(0)))
901
-
902
- responses.add_callback(
903
- responses.POST,
904
- "https://api.github.com/graphql",
905
- callback=request_callback,
906
- content_type="application/json",
907
- )
908
-
909
- stream_state = {}
910
- records = read_incremental(stream, stream_state)
911
- assert [r["id"] for r in records] == [1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008]
912
- assert stream_state == {"airbytehq/airbyte": {"updated_at": "2000-01-01T00:00:01Z"}}
913
- assert len(responses.calls) == 4
914
-
915
- responses.calls.reset()
916
- records = read_incremental(stream, stream_state)
917
- assert [r["id"] for r in records] == [1000, 1007, 1009]
918
- assert stream_state == {"airbytehq/airbyte": {"updated_at": "2000-01-01T00:00:02Z"}}
919
- assert len(responses.calls) == 4
920
-
921
-
922
- @responses.activate
923
- def test_stream_team_members_full_refresh(caplog, rate_limit_mock_response):
924
- organization_args = {"organizations": ["org1"]}
925
- repository_args = {"repositories": [], "page_size_for_large_streams": 100}
926
-
927
- responses.add("GET", "https://api.github.com/orgs/org1/teams", json=[{"slug": "team1"}, {"slug": "team2"}])
928
- responses.add("GET", "https://api.github.com/orgs/org1/teams/team1/members", json=[{"login": "login1"}, {"login": "login2"}])
929
- responses.add("GET", "https://api.github.com/orgs/org1/teams/team1/memberships/login1", json={"username": "login1"})
930
- responses.add("GET", "https://api.github.com/orgs/org1/teams/team1/memberships/login2", json={"username": "login2"})
931
- responses.add("GET", "https://api.github.com/orgs/org1/teams/team2/members", json=[{"login": "login2"}, {"login": "login3"}])
932
- responses.add("GET", "https://api.github.com/orgs/org1/teams/team2/memberships/login2", json={"username": "login2"})
933
- responses.add("GET", "https://api.github.com/orgs/org1/teams/team2/memberships/login3", status=requests.codes.NOT_FOUND)
934
-
935
- teams_stream = Teams(**organization_args)
936
- stream = TeamMembers(parent=teams_stream, **repository_args)
937
- teams_stream._session.cache.clear()
938
- records = list(read_full_refresh(stream))
939
-
940
- assert records == [
941
- {"login": "login1", "organization": "org1", "team_slug": "team1"},
942
- {"login": "login2", "organization": "org1", "team_slug": "team1"},
943
- {"login": "login2", "organization": "org1", "team_slug": "team2"},
944
- {"login": "login3", "organization": "org1", "team_slug": "team2"},
945
- ]
946
-
947
- stream = TeamMemberships(parent=stream, **repository_args)
948
- records = list(read_full_refresh(stream))
949
-
950
- assert records == [
951
- {"username": "login1", "organization": "org1", "team_slug": "team1"},
952
- {"username": "login2", "organization": "org1", "team_slug": "team1"},
953
- {"username": "login2", "organization": "org1", "team_slug": "team2"},
954
- ]
955
- expected_message = "Syncing `TeamMemberships` stream for organization `org1`, team `team2` and user `login3` isn't available: User has no team membership. Skipping..."
956
- assert expected_message in caplog.messages
957
-
958
-
959
- @responses.activate
960
- def test_stream_commit_comment_reactions_incremental_read():
961
-
962
- repository_args = {"repositories": ["airbytehq/integration-test"], "page_size_for_large_streams": 100}
963
- stream = CommitCommentReactions(**repository_args)
964
- stream._parent_stream._session.cache.clear()
965
-
966
- responses.add(
967
- "GET",
968
- "https://api.github.com/repos/airbytehq/integration-test/comments",
969
- json=[
970
- {"id": 55538825, "updated_at": "2021-01-01T15:00:00Z"},
971
- {"id": 55538826, "updated_at": "2021-01-01T16:00:00Z"},
972
- ],
973
- )
974
-
975
- responses.add(
976
- "GET",
977
- "https://api.github.com/repos/airbytehq/integration-test/comments/55538825/reactions",
978
- json=[
979
- {"id": 154935429, "created_at": "2022-01-01T15:00:00Z"},
980
- {"id": 154935430, "created_at": "2022-01-01T16:00:00Z"},
981
- ],
982
- )
983
-
984
- responses.add(
985
- "GET",
986
- "https://api.github.com/repos/airbytehq/integration-test/comments/55538826/reactions",
987
- json=[{"id": 154935431, "created_at": "2022-01-01T17:00:00Z"}],
988
- )
989
-
990
- stream_state = {}
991
- records = read_incremental(stream, stream_state)
992
-
993
- assert stream_state == {
994
- "airbytehq/integration-test": {
995
- "55538825": {"created_at": "2022-01-01T16:00:00Z"},
996
- "55538826": {"created_at": "2022-01-01T17:00:00Z"},
997
- }
998
- }
999
-
1000
- assert records == [
1001
- {"id": 154935429, "comment_id": 55538825, "created_at": "2022-01-01T15:00:00Z", "repository": "airbytehq/integration-test"},
1002
- {"id": 154935430, "comment_id": 55538825, "created_at": "2022-01-01T16:00:00Z", "repository": "airbytehq/integration-test"},
1003
- {"id": 154935431, "comment_id": 55538826, "created_at": "2022-01-01T17:00:00Z", "repository": "airbytehq/integration-test"},
1004
- ]
1005
-
1006
- responses.add(
1007
- "GET",
1008
- "https://api.github.com/repos/airbytehq/integration-test/comments",
1009
- json=[
1010
- {"id": 55538825, "updated_at": "2021-01-01T15:00:00Z"},
1011
- {"id": 55538826, "updated_at": "2021-01-01T16:00:00Z"},
1012
- {"id": 55538827, "updated_at": "2022-02-01T15:00:00Z"},
1013
- ],
1014
- )
1015
-
1016
- responses.add(
1017
- "GET",
1018
- "https://api.github.com/repos/airbytehq/integration-test/comments/55538826/reactions",
1019
- json=[
1020
- {"id": 154935431, "created_at": "2022-01-01T17:00:00Z"},
1021
- {"id": 154935432, "created_at": "2022-02-01T16:00:00Z"},
1022
- ],
1023
- )
1024
-
1025
- responses.add(
1026
- "GET",
1027
- "https://api.github.com/repos/airbytehq/integration-test/comments/55538827/reactions",
1028
- json=[{"id": 154935433, "created_at": "2022-02-01T17:00:00Z"}],
1029
- )
1030
-
1031
- stream._parent_stream._session.cache.clear()
1032
- records = read_incremental(stream, stream_state)
1033
-
1034
- assert records == [
1035
- {"id": 154935432, "comment_id": 55538826, "created_at": "2022-02-01T16:00:00Z", "repository": "airbytehq/integration-test"},
1036
- {"id": 154935433, "comment_id": 55538827, "created_at": "2022-02-01T17:00:00Z", "repository": "airbytehq/integration-test"},
1037
- ]
1038
-
1039
-
1040
- @responses.activate
1041
- def test_stream_workflow_runs_read_incremental(monkeypatch):
1042
-
1043
- repository_args_with_start_date = {
1044
- "repositories": ["org/repos"],
1045
- "page_size_for_large_streams": 30,
1046
- "start_date": "2022-01-01T00:00:00Z",
1047
- }
1048
-
1049
- monkeypatch.setattr(constants, "DEFAULT_PAGE_SIZE", 1)
1050
- stream = WorkflowRuns(**repository_args_with_start_date)
1051
-
1052
- data = [
1053
- {"id": 4, "created_at": "2022-02-05T00:00:00Z", "updated_at": "2022-02-05T00:00:00Z", "repository": {"full_name": "org/repos"}},
1054
- {"id": 3, "created_at": "2022-01-15T00:00:00Z", "updated_at": "2022-01-15T00:00:00Z", "repository": {"full_name": "org/repos"}},
1055
- {"id": 2, "created_at": "2022-01-03T00:00:00Z", "updated_at": "2022-01-03T00:00:00Z", "repository": {"full_name": "org/repos"}},
1056
- {"id": 1, "created_at": "2022-01-02T00:00:00Z", "updated_at": "2022-01-02T00:00:00Z", "repository": {"full_name": "org/repos"}},
1057
- ]
1058
-
1059
- responses.add(
1060
- "GET",
1061
- "https://api.github.com/repos/org/repos/actions/runs",
1062
- json={"total_count": len(data), "workflow_runs": data[0:1]},
1063
- headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=2>; rel="next"'},
1064
- match=[matchers.query_param_matcher({"per_page": "1"}, strict_match=True)],
1065
- )
1066
-
1067
- responses.add(
1068
- "GET",
1069
- "https://api.github.com/repos/org/repos/actions/runs",
1070
- json={"total_count": len(data), "workflow_runs": data[1:2]},
1071
- headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=3>; rel="next"'},
1072
- match=[matchers.query_param_matcher({"per_page": "1", "page": "2"}, strict_match=True)],
1073
- )
1074
-
1075
- responses.add(
1076
- "GET",
1077
- "https://api.github.com/repos/org/repos/actions/runs",
1078
- json={"total_count": len(data), "workflow_runs": data[2:3]},
1079
- headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=4>; rel="next"'},
1080
- match=[matchers.query_param_matcher({"per_page": "1", "page": "3"}, strict_match=True)],
1081
- )
1082
-
1083
- responses.add(
1084
- "GET",
1085
- "https://api.github.com/repos/org/repos/actions/runs",
1086
- json={"total_count": len(data), "workflow_runs": data[3:4]},
1087
- match=[matchers.query_param_matcher({"per_page": "1", "page": "4"}, strict_match=True)],
1088
- )
1089
-
1090
- state = {}
1091
- records = read_incremental(stream, state)
1092
- assert state == {"org/repos": {"updated_at": "2022-02-05T00:00:00Z"}}
1093
-
1094
- assert records == [
1095
- {"id": 4, "repository": {"full_name": "org/repos"}, "created_at": "2022-02-05T00:00:00Z", "updated_at": "2022-02-05T00:00:00Z"},
1096
- {"id": 3, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-15T00:00:00Z", "updated_at": "2022-01-15T00:00:00Z"},
1097
- {"id": 2, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-03T00:00:00Z", "updated_at": "2022-01-03T00:00:00Z"},
1098
- {"id": 1, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-02T00:00:00Z", "updated_at": "2022-01-02T00:00:00Z"},
1099
- ]
1100
-
1101
- assert len(responses.calls) == 4
1102
-
1103
- data.insert(
1104
- 0,
1105
- {
1106
- "id": 5,
1107
- "created_at": "2022-02-07T00:00:00Z",
1108
- "updated_at": "2022-02-07T00:00:00Z",
1109
- "repository": {"full_name": "org/repos"},
1110
- },
1111
- )
1112
-
1113
- data[2]["updated_at"] = "2022-02-08T00:00:00Z"
1114
-
1115
- responses.add(
1116
- "GET",
1117
- "https://api.github.com/repos/org/repos/actions/runs",
1118
- json={"total_count": len(data), "workflow_runs": data[0:1]},
1119
- headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=2>; rel="next"'},
1120
- match=[matchers.query_param_matcher({"per_page": "1"}, strict_match=True)],
1121
- )
1122
-
1123
- responses.add(
1124
- "GET",
1125
- "https://api.github.com/repos/org/repos/actions/runs",
1126
- json={"total_count": len(data), "workflow_runs": data[1:2]},
1127
- headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=3>; rel="next"'},
1128
- match=[matchers.query_param_matcher({"per_page": "1", "page": "2"}, strict_match=True)],
1129
- )
1130
-
1131
- responses.add(
1132
- "GET",
1133
- "https://api.github.com/repos/org/repos/actions/runs",
1134
- json={"total_count": len(data), "workflow_runs": data[2:3]},
1135
- headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=4>; rel="next"'},
1136
- match=[matchers.query_param_matcher({"per_page": "1", "page": "3"}, strict_match=True)],
1137
- )
1138
-
1139
- responses.add(
1140
- "GET",
1141
- "https://api.github.com/repos/org/repos/actions/runs",
1142
- json={"total_count": len(data), "workflow_runs": data[3:4]},
1143
- headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=5>; rel="next"'},
1144
- match=[matchers.query_param_matcher({"per_page": "1", "page": "4"}, strict_match=True)],
1145
- )
1146
-
1147
- responses.calls.reset()
1148
- records = read_incremental(stream, state)
1149
-
1150
- assert state == {"org/repos": {"updated_at": "2022-02-08T00:00:00Z"}}
1151
- assert records == [
1152
- {"id": 5, "repository": {"full_name": "org/repos"}, "created_at": "2022-02-07T00:00:00Z", "updated_at": "2022-02-07T00:00:00Z"},
1153
- {"id": 3, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-15T00:00:00Z", "updated_at": "2022-02-08T00:00:00Z"},
1154
- ]
1155
-
1156
- assert len(responses.calls) == 4
1157
-
1158
-
1159
- @responses.activate
1160
- def test_stream_workflow_jobs_read():
1161
-
1162
- repository_args = {
1163
- "repositories": ["org/repo"],
1164
- "page_size_for_large_streams": 100,
1165
- }
1166
- repository_args_with_start_date = {**repository_args, "start_date": "2022-09-02T09:05:00Z"}
1167
-
1168
- workflow_runs_stream = WorkflowRuns(**repository_args_with_start_date)
1169
- stream = WorkflowJobs(workflow_runs_stream, **repository_args_with_start_date)
1170
-
1171
- workflow_runs = [
1172
- {
1173
- "id": 1,
1174
- "created_at": "2022-09-02T09:00:00Z",
1175
- "updated_at": "2022-09-02T09:10:02Z",
1176
- "repository": {"full_name": "org/repo"},
1177
- },
1178
- {
1179
- "id": 2,
1180
- "created_at": "2022-09-02T09:06:00Z",
1181
- "updated_at": "2022-09-02T09:08:00Z",
1182
- "repository": {"full_name": "org/repo"},
1183
- },
1184
- ]
1185
-
1186
- workflow_jobs_1 = [
1187
- {"id": 1, "completed_at": "2022-09-02T09:02:00Z", "run_id": 1},
1188
- {"id": 4, "completed_at": "2022-09-02T09:10:00Z", "run_id": 1},
1189
- {"id": 5, "completed_at": None, "run_id": 1},
1190
- ]
1191
-
1192
- workflow_jobs_2 = [
1193
- {"id": 2, "completed_at": "2022-09-02T09:07:00Z", "run_id": 2},
1194
- {"id": 3, "completed_at": "2022-09-02T09:08:00Z", "run_id": 2},
1195
- ]
1196
-
1197
- responses.add(
1198
- "GET",
1199
- "https://api.github.com/repos/org/repo/actions/runs",
1200
- json={"total_count": len(workflow_runs), "workflow_runs": workflow_runs},
1201
- )
1202
- responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/1/jobs", json={"jobs": workflow_jobs_1})
1203
- responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/2/jobs", json={"jobs": workflow_jobs_2})
1204
-
1205
- state = {}
1206
- records = read_incremental(stream, state)
1207
- assert state == {"org/repo": {"completed_at": "2022-09-02T09:10:00Z"}}
1208
-
1209
- assert records == [
1210
- {"completed_at": "2022-09-02T09:10:00Z", "id": 4, "repository": "org/repo", "run_id": 1},
1211
- {"completed_at": "2022-09-02T09:07:00Z", "id": 2, "repository": "org/repo", "run_id": 2},
1212
- {"completed_at": "2022-09-02T09:08:00Z", "id": 3, "repository": "org/repo", "run_id": 2},
1213
- ]
1214
-
1215
- assert len(responses.calls) == 3
1216
-
1217
- workflow_jobs_1[2]["completed_at"] = "2022-09-02T09:12:00Z"
1218
- workflow_runs[0]["updated_at"] = "2022-09-02T09:12:01Z"
1219
- workflow_runs.append(
1220
- {
1221
- "id": 3,
1222
- "created_at": "2022-09-02T09:14:00Z",
1223
- "updated_at": "2022-09-02T09:15:00Z",
1224
- "repository": {"full_name": "org/repo"},
1225
- }
1226
- )
1227
- workflow_jobs_3 = [
1228
- {"id": 6, "completed_at": "2022-09-02T09:15:00Z", "run_id": 3},
1229
- {"id": 7, "completed_at": None, "run_id": 3},
1230
- ]
1231
-
1232
- responses.add(
1233
- "GET",
1234
- "https://api.github.com/repos/org/repo/actions/runs",
1235
- json={"total_count": len(workflow_runs), "workflow_runs": workflow_runs},
1236
- )
1237
- responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/1/jobs", json={"jobs": workflow_jobs_1})
1238
- responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/2/jobs", json={"jobs": workflow_jobs_2})
1239
- responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/3/jobs", json={"jobs": workflow_jobs_3})
1240
-
1241
- responses.calls.reset()
1242
- records = read_incremental(stream, state)
1243
-
1244
- assert state == {"org/repo": {"completed_at": "2022-09-02T09:15:00Z"}}
1245
- assert records == [
1246
- {"completed_at": "2022-09-02T09:12:00Z", "id": 5, "repository": "org/repo", "run_id": 1},
1247
- {"completed_at": "2022-09-02T09:15:00Z", "id": 6, "repository": "org/repo", "run_id": 3},
1248
- ]
1249
-
1250
- records = list(read_full_refresh(stream))
1251
- assert records == [
1252
- {"id": 4, "completed_at": "2022-09-02T09:10:00Z", "run_id": 1, "repository": "org/repo"},
1253
- {"id": 5, "completed_at": "2022-09-02T09:12:00Z", "run_id": 1, "repository": "org/repo"},
1254
- {"id": 2, "completed_at": "2022-09-02T09:07:00Z", "run_id": 2, "repository": "org/repo"},
1255
- {"id": 3, "completed_at": "2022-09-02T09:08:00Z", "run_id": 2, "repository": "org/repo"},
1256
- {"id": 6, "completed_at": "2022-09-02T09:15:00Z", "run_id": 3, "repository": "org/repo"},
1257
- ]
1258
-
1259
-
1260
- @responses.activate
1261
- def test_stream_pull_request_comment_reactions_read():
1262
-
1263
- repository_args_with_start_date = {
1264
- "start_date": "2022-01-01T00:00:00Z",
1265
- "page_size_for_large_streams": 2,
1266
- "repositories": ["airbytehq/airbyte"],
1267
- }
1268
- stream = PullRequestCommentReactions(**repository_args_with_start_date)
1269
- stream.page_size = 2
1270
-
1271
- f = Path(__file__).parent / "responses/pull_request_comment_reactions.json"
1272
- response_objects = json.load(open(f))
1273
-
1274
- def request_callback(request):
1275
- return (HTTPStatus.OK, {}, json.dumps(response_objects.pop(0)))
1276
-
1277
- responses.add_callback(
1278
- responses.POST,
1279
- "https://api.github.com/graphql",
1280
- callback=request_callback,
1281
- content_type="application/json",
1282
- )
1283
-
1284
- stream_state = {}
1285
- records = read_incremental(stream, stream_state)
1286
- records = [{"comment_id": r["comment_id"], "created_at": r["created_at"], "node_id": r["node_id"]} for r in records]
1287
- assert records == [
1288
- {"comment_id": "comment1", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction1"},
1289
- {"comment_id": "comment1", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction2"},
1290
- {"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction3"},
1291
- {"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction4"},
1292
- {"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction5"},
1293
- {"comment_id": "comment5", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction6"},
1294
- {"comment_id": "comment7", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction7"},
1295
- {"comment_id": "comment8", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction8"},
1296
- ]
1297
-
1298
- assert stream_state == {"airbytehq/airbyte": {"created_at": "2022-01-01T00:00:01Z"}}
1299
- records = read_incremental(stream, stream_state)
1300
- records = [{"comment_id": r["comment_id"], "created_at": r["created_at"], "node_id": r["node_id"]} for r in records]
1301
-
1302
- assert records == [
1303
- {"comment_id": "comment2", "created_at": "2022-01-02T00:00:01Z", "node_id": "reaction9"},
1304
- {"comment_id": "comment8", "created_at": "2022-01-02T00:00:01Z", "node_id": "reaction10"},
1305
- ]
1306
-
1307
- assert stream_state == {"airbytehq/airbyte": {"created_at": "2022-01-02T00:00:01Z"}}
1308
-
1309
-
1310
- @responses.activate
1311
- def test_stream_projects_v2_graphql_retry(rate_limit_mock_response):
1312
- repository_args_with_start_date = {
1313
- "start_date": "2022-01-01T00:00:00Z",
1314
- "page_size_for_large_streams": 20,
1315
- "repositories": ["airbytehq/airbyte"],
1316
- }
1317
- stream = ProjectsV2(**repository_args_with_start_date)
1318
- resp = responses.add(
1319
- responses.POST,
1320
- "https://api.github.com/graphql",
1321
- json={"errors": "not found"},
1322
- status=200,
1323
- )
1324
-
1325
- with patch.object(stream, "backoff_time", return_value=0.01), pytest.raises(UserDefinedBackoffException):
1326
- read_incremental(stream, stream_state={})
1327
- assert resp.call_count == stream.max_retries + 1
1328
-
1329
-
1330
- @responses.activate
1331
- def test_stream_projects_v2_graphql_query():
1332
- repository_args_with_start_date = {
1333
- "start_date": "2022-01-01T00:00:00Z",
1334
- "page_size_for_large_streams": 20,
1335
- "repositories": ["airbytehq/airbyte"],
1336
- }
1337
- stream = ProjectsV2(**repository_args_with_start_date)
1338
- query = stream.request_body_json(stream_state={}, stream_slice={"repository": "airbytehq/airbyte"})
1339
- responses.add(
1340
- responses.POST,
1341
- "https://api.github.com/graphql",
1342
- json=json.load(open(Path(__file__).parent / "responses/projects_v2_response.json")),
1343
- )
1344
- f = Path(__file__).parent / "projects_v2_pull_requests_query.json"
1345
- expected_query = json.load(open(f))
1346
-
1347
- records = list(read_full_refresh(stream))
1348
- assert query == expected_query
1349
- assert records[0].get("owner_id")
1350
- assert records[0].get("repository")
1351
-
1352
-
1353
- @responses.activate
1354
- def test_stream_contributor_activity_parse_empty_response(caplog):
1355
- repository_args = {
1356
- "page_size_for_large_streams": 20,
1357
- "repositories": ["airbytehq/airbyte"],
1358
- }
1359
- stream = ContributorActivity(**repository_args)
1360
- resp = responses.add(
1361
- responses.GET,
1362
- "https://api.github.com/repos/airbytehq/airbyte/stats/contributors",
1363
- body="",
1364
- status=204,
1365
- )
1366
- records = list(read_full_refresh(stream))
1367
- expected_message = "Empty response received for contributor_activity stats in repository airbytehq/airbyte"
1368
- assert resp.call_count == 1
1369
- assert records == []
1370
- assert expected_message in caplog.messages
1371
-
1372
-
1373
- @responses.activate
1374
- def test_stream_contributor_activity_accepted_response(caplog, rate_limit_mock_response):
1375
- responses.add(
1376
- responses.GET,
1377
- "https://api.github.com/repos/airbytehq/test_airbyte?per_page=100",
1378
- json={"full_name": "airbytehq/test_airbyte"},
1379
- status=200,
1380
- )
1381
- responses.add(
1382
- responses.GET,
1383
- "https://api.github.com/repos/airbytehq/test_airbyte?per_page=100",
1384
- json={"full_name": "airbytehq/test_airbyte", "default_branch": "default_branch"},
1385
- status=200,
1386
- )
1387
- responses.add(
1388
- responses.GET,
1389
- "https://api.github.com/repos/airbytehq/test_airbyte/branches?per_page=100",
1390
- json={},
1391
- status=200,
1392
- )
1393
- resp = responses.add(
1394
- responses.GET,
1395
- "https://api.github.com/repos/airbytehq/test_airbyte/stats/contributors?per_page=100",
1396
- body="",
1397
- status=202,
1398
- )
1399
-
1400
- source = SourceGithub()
1401
- configured_catalog = {
1402
- "streams": [
1403
- {
1404
- "stream": {"name": "contributor_activity", "json_schema": {}, "supported_sync_modes": ["full_refresh"],"source_defined_primary_key": [["id"]]},
1405
- "sync_mode": "full_refresh",
1406
- "destination_sync_mode": "overwrite"
1407
- }
1408
- ]
1409
- }
1410
- catalog = ConfiguredAirbyteCatalog.parse_obj(configured_catalog)
1411
- config = {"access_token": "test_token", "repository": "airbytehq/test_airbyte"}
1412
- logger_mock = MagicMock()
1413
-
1414
- with patch("time.sleep", return_value=0):
1415
- records = list(source.read(config=config, logger=logger_mock, catalog=catalog, state={}))
1416
-
1417
- assert records[2].log.message == "Syncing `ContributorActivity` stream isn't available for repository `airbytehq/test_airbyte`."
1418
- assert resp.call_count == 6
1419
-
1420
-
1421
- @responses.activate
1422
- def test_stream_contributor_activity_parse_response():
1423
- repository_args = {
1424
- "page_size_for_large_streams": 20,
1425
- "repositories": ["airbytehq/airbyte"],
1426
- }
1427
- stream = ContributorActivity(**repository_args)
1428
- responses.add(
1429
- responses.GET,
1430
- "https://api.github.com/repos/airbytehq/airbyte/stats/contributors",
1431
- json=json.load(open(Path(__file__).parent / "responses/contributor_activity_response.json")),
1432
- )
1433
- records = list(read_full_refresh(stream))
1434
- assert len(records) == 1
1435
-
1436
-
1437
- @responses.activate
1438
- def test_issues_timeline_events():
1439
- repository_args = {
1440
- "repositories": ["airbytehq/airbyte"],
1441
- "page_size_for_large_streams": 20,
1442
- }
1443
- response_file = Path(__file__).parent / "responses/issue_timeline_events.json"
1444
- response_json = json.load(open(response_file))
1445
- responses.add(responses.GET, "https://api.github.com/repos/airbytehq/airbyte/issues/1/timeline?per_page=100", json=response_json)
1446
- expected_file = Path(__file__).parent / "responses/issue_timeline_events_response.json"
1447
- expected_records = json.load(open(expected_file))
1448
-
1449
- stream = IssueTimelineEvents(**repository_args)
1450
- records = list(stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice={"repository": "airbytehq/airbyte", "number": 1}))
1451
- assert expected_records == records
1452
-
1453
-
1454
- @responses.activate
1455
- def test_pull_request_stats():
1456
- repository_args = {
1457
- "page_size_for_large_streams": 10,
1458
- "repositories": ["airbytehq/airbyte"],
1459
- }
1460
- stream = PullRequestStats(**repository_args)
1461
- query = stream.request_body_json(stream_state={}, stream_slice={"repository": "airbytehq/airbyte"})
1462
- responses.add(
1463
- responses.POST,
1464
- "https://api.github.com/graphql",
1465
- json=json.load(open(Path(__file__).parent / "responses/pull_request_stats_response.json")),
1466
- )
1467
- f = Path(__file__).parent / "pull_request_stats_query.json"
1468
- expected_query = json.load(open(f))
1469
-
1470
- list(read_full_refresh(stream))
1471
- assert query == expected_query