dcicutils 7.6.0.2b10__py3-none-any.whl → 7.7.0.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,570 +0,0 @@
1
- import datetime
2
- import git
3
- import io
4
- import json
5
- import os
6
- import re
7
- import warnings
8
-
9
- from collections import defaultdict
10
- from dcicutils.diff_utils import DiffManager
11
- from dcicutils.lang_utils import n_of
12
- from dcicutils.misc_utils import PRINT, ignored, environ_bool
13
- from typing import Dict, List, Optional, Set, Type
14
-
15
-
16
- DEBUG_CONTRIBUTIONS = environ_bool("DEBUG_CONTRIBUTIONS")
17
-
18
- GITHUB_USER_REGEXP = re.compile('(?:[0-9]+[+])?(.*)[@]users[.]noreply[.]github[.]com')
19
- PROJECT_HOME = os.environ.get('PROJECT_HOME', os.path.dirname(os.path.abspath(os.curdir)))
20
-
21
-
22
- class GitAnalysis:
23
-
24
- @classmethod
25
- def find_repo(cls, repo_name: str) -> git.Repo:
26
- repo_path = os.path.join(PROJECT_HOME, repo_name)
27
- repo = git.Repo(repo_path)
28
- return repo
29
-
30
- @classmethod
31
- def git_commits(cls, repo_name) -> List[git.Commit]:
32
- repo = cls.find_repo(repo_name)
33
- commit: git.Commit
34
- for commit in repo.iter_commits():
35
- yield cls.json_for_commit(commit)
36
-
37
- @classmethod
38
- def author_name(cls, actor: git.Actor) -> str:
39
- return actor.name or actor.email.split('@')[0]
40
-
41
- @classmethod
42
- def json_for_actor(cls, actor: git.Actor) -> Dict:
43
- return {
44
- "name": cls.author_name(actor),
45
- "email": actor.email,
46
- }
47
-
48
- @classmethod
49
- def json_for_commit(cls, commit: git.Commit) -> Dict:
50
- return {
51
- 'commit': commit.hexsha,
52
- 'date': commit.committed_datetime.isoformat(),
53
- 'author': cls.json_for_actor(commit.author),
54
- 'coauthors': [cls.json_for_actor(co_author) for co_author in commit.co_authors],
55
- 'message': commit.message,
56
- }
57
-
58
-
59
- class Contributor:
60
-
61
- @classmethod
62
- def create(cls, *, author: git.Actor) -> 'Contributor':
63
- return Contributor(email=author.email, name=GitAnalysis.author_name(author))
64
-
65
- def __init__(self, *, email: Optional[str] = None, name: Optional[str] = None,
66
- emails: Optional[Set[str]] = None, names: Optional[Set[str]] = None,
67
- primary_name: Optional[str] = None):
68
- # Both email and name are required keyword arguments, though name is allowed to be None,
69
- # even though email is not. The primary_name is not required, and defaults to None, so will
70
- # be heuristically computed based on available names.
71
- if not email and not emails:
72
- raise ValueError("One of email= or emails= is required.")
73
- if email and emails:
74
- raise ValueError("Only one of email= and emails= may be provided.")
75
- if not emails:
76
- emails = {email}
77
- if name and names:
78
- raise ValueError("Only one of name= and names= may be provided.")
79
- if name and not names:
80
- names = {name}
81
- self.emails: Set[str] = emails
82
- self.names: Set[str] = names or set()
83
- self._primary_name = primary_name
84
- if primary_name:
85
- self.names.add(primary_name)
86
-
87
- def __str__(self):
88
- maybe_primary = f" {self._primary_name!r}" if self._primary_name else ""
89
- emails = ",".join(sorted(map(repr, self.emails), key=lambda x: x.lower()))
90
- names = ",".join(sorted(map(repr, self.names), key=lambda x: x.lower()))
91
- return f"<{self.__class__.__name__}{maybe_primary} emails={emails} names={names} {id(self)}>"
92
-
93
- def copy(self):
94
- return Contributor(emails=self.emails.copy(), names=self.names.copy(), primary_name=self._primary_name)
95
-
96
- @property
97
- def primary_name(self):
98
- if self._primary_name:
99
- return self._primary_name
100
- return sorted(self.names, key=self.name_variation_spelling_sort_key, reverse=True)[0]
101
-
102
- def set_primary_name(self, primary_name: str):
103
- self.names.add(primary_name)
104
- self._primary_name = primary_name
105
-
106
- def notice_mention_as(self, *, email: Optional[str] = None, name: Optional[str] = None):
107
- if email is not None and email not in self.emails:
108
- self.emails.add(email)
109
- if name is not None and name not in self.names:
110
- self.names.add(name)
111
-
112
- def as_dict(self):
113
- # Note that the sort is case-sensitive alphabetic just because that's easiest here.
114
- # Making it be case-insensitive would require a special case for non-strings,
115
- # and all we really care about is some easy degree of determinism for testing.
116
- data = {
117
- "emails": sorted(self.emails),
118
- "names": sorted(self.names),
119
- }
120
- return data
121
-
122
- @classmethod
123
- def from_dict(cls, data: Dict) -> 'Contributor':
124
- emails = data["emails"]
125
- names = data["names"]
126
- contributor = Contributor(email=emails[0], name=names[0])
127
- contributor.emails = set(emails)
128
- contributor.names = set(names)
129
- return contributor
130
-
131
- @classmethod
132
- def name_variation_spelling_sort_key(cls, name):
133
- return (
134
- ' ' in name, # we prefer names like 'jane doe' over jdoe123
135
- len(name), # longer names are usually more formal; william smith vs will smith
136
- name, # this names by alphabetical order not because one is better, but to make sort results deterministic
137
- )
138
-
139
-
140
- ContributorIndex = Optional[Dict[str, Contributor]]
141
-
142
-
143
- class BasicContributions(GitAnalysis):
144
-
145
- VERBOSE = False
146
-
147
- def __init__(self, *, repo: Optional[str] = None,
148
- verbose: Optional[bool] = None):
149
- self.email_timestamps: Dict[str, datetime.datetime] = {}
150
- self.name_timestamps: Dict[str, datetime.datetime] = {}
151
- if not repo:
152
- # Doing it this way gets around an ambiguity about '/foo/' vs '/foo' since both
153
- # os.path.join('/foo/', 'bar') and os.path.join('/foo', 'bar') yield '/foo/bar',
154
- # and from there one can do os.path.basename(os.path.dirname(...)) to get 'foo' out.
155
- cache_file = os.path.join(os.path.abspath(os.path.curdir), self.CONTRIBUTORS_CACHE_FILE)
156
- dir = os.path.dirname(cache_file)
157
- repo = os.path.basename(dir)
158
- self.repo: str = repo
159
- self.forked_at: Optional[datetime.datetime] = None
160
- self.contributors_by_name: Optional[ContributorIndex] = None
161
- self.contributors_by_email: Optional[ContributorIndex] = None
162
- self.pre_fork_contributors_by_email: Optional[ContributorIndex] = None
163
- self.pre_fork_contributors_by_name: Optional[ContributorIndex] = None
164
- self.loaded_contributor_data = None
165
- self.cache_discrepancies: Optional[dict] = None
166
- self.verbose = self.VERBOSE if verbose is None else verbose
167
-
168
- CONTRIBUTORS_CACHE_FILE = 'CONTRIBUTORS.json'
169
-
170
- def contributors_json_file(self):
171
- """
172
- Returns the name of the CONTRIBUTORS.json file for the repo associated with this class.
173
- """
174
- return os.path.join(PROJECT_HOME, self.repo, self.CONTRIBUTORS_CACHE_FILE)
175
-
176
- def existing_contributors_json_file(self):
177
- """
178
- Returns the name of the CONTRIBUTORS.json file for the repo associated with this class if that file exists,
179
- or None if there is no such file.
180
- """
181
- file = self.contributors_json_file()
182
- if os.path.exists(file):
183
- return file
184
- else:
185
- return None
186
-
187
- @classmethod
188
- def notice_reference_time(cls, key: str, timestamp: datetime.datetime, timestamps: Dict[str, datetime.datetime]):
189
- reference_timestamp: datetime.datetime = timestamps.get(key)
190
- if not reference_timestamp:
191
- timestamps[key] = timestamp
192
- elif timestamp > reference_timestamp:
193
- timestamps[key] = timestamp
194
-
195
- def email_reference_time(self, email):
196
- return self.email_timestamps.get(email)
197
-
198
- def name_reference_time(self, name):
199
- return self.name_timestamps.get(name)
200
-
201
- @classmethod
202
- def contributor_values_as_dicts(cls, contributor_index: Optional[ContributorIndex]):
203
- if contributor_index is None:
204
- return None
205
- else:
206
- return {
207
- key: contributor.as_dict()
208
- for key, contributor in contributor_index.items()
209
- }
210
-
211
- @classmethod
212
- def contributor_values_as_objects(cls, contributor_index: Optional[Dict]) -> Optional[ContributorIndex]:
213
- if contributor_index is None:
214
- return None
215
- else:
216
- return {
217
- key: Contributor.from_dict(value)
218
- for key, value in contributor_index.items()
219
- }
220
-
221
- def checkpoint_state(self):
222
- return self.as_dict()
223
-
224
- def as_dict(self):
225
-
226
- data = {}
227
-
228
- forked_at = self.forked_at.isoformat() if self.forked_at else None
229
- if forked_at is not None:
230
- data["forked_at"] = forked_at
231
-
232
- pre_fork_contributors_by_name = self.contributor_values_as_dicts(self.pre_fork_contributors_by_name)
233
- if pre_fork_contributors_by_name is not None:
234
- data["pre_fork_contributors_by_name"] = pre_fork_contributors_by_name
235
-
236
- contributors_by_name = self.contributor_values_as_dicts(self.contributors_by_name)
237
- if contributors_by_name is not None:
238
- data["contributors_by_name"] = contributors_by_name
239
-
240
- return data
241
-
242
- def save_contributor_data(self, filename: Optional[str] = None) -> str:
243
- if filename is None:
244
- filename = self.contributors_json_file()
245
- with io.open(filename, 'w') as fp:
246
- PRINT(json.dumps(self.as_dict(), indent=2), file=fp)
247
- return filename
248
-
249
- def repo_contributor_names(self, with_email=False):
250
- for name, contributor in self.contributors_by_name.items():
251
- if with_email:
252
- yield f"{name} ({', '.join([self.pretty_email(email) for email in contributor.emails])})"
253
- else:
254
- yield name
255
-
256
- def show_repo_contributors(self, analyze_discrepancies: bool = True, with_email: bool = True,
257
- error_class: Optional[Type[BaseException]] = None):
258
- for author_name in self.repo_contributor_names(with_email=with_email):
259
- PRINT(author_name)
260
- if analyze_discrepancies:
261
- file = self.existing_contributors_json_file()
262
- if not file:
263
- message = f"Need to create a {self.CONTRIBUTORS_CACHE_FILE} file for {self.repo}."
264
- if error_class:
265
- raise error_class(message)
266
- else:
267
- PRINT(message)
268
- elif self.cache_discrepancies:
269
- message = "There are contributor cache discrepancies."
270
- PRINT(f"===== {message.rstrip('.').upper()} =====")
271
- for action, items in self.cache_discrepancies.items():
272
- action: str
273
- PRINT(f"{action.replace('_', ' ').title()}:")
274
- for item in items:
275
- PRINT(f" * {item}")
276
- if error_class:
277
- raise error_class(message)
278
-
279
- @classmethod
280
- def pretty_email(cls, email):
281
- m = GITHUB_USER_REGEXP.match(email)
282
- if m:
283
- user_name = m.group(1)
284
- return f"{user_name}@github"
285
- else:
286
- return email
287
-
288
- @classmethod
289
- def get_contributors_json_from_file_cache(cls, filename):
290
- try:
291
- with io.open(filename, 'r') as fp:
292
- data = json.load(fp)
293
- except Exception:
294
- PRINT(f"Error while reading data from {filename!r}.")
295
- raise
296
- return data
297
-
298
- @classmethod
299
- def contributor_index_by_primary_name(cls, contributors_by_name: ContributorIndex) -> ContributorIndex:
300
- """
301
- Given a by-name contributor index:
302
-
303
- * Makes sure that all contributors have only one name, indexed by the contributor's primary name
304
- * Sorts the resulting index using a case-insensitive alphabetic sort
305
-
306
- and then returns the result.
307
-
308
- :param contributors_by_name: a contributor index indexed by human name
309
- :return: a contributor index
310
- """
311
- seen = set()
312
- nicknames_seen = set()
313
- contributor_items = []
314
- contributors = {}
315
- for name, contributor in contributors_by_name.items():
316
- if contributor not in seen:
317
- for nickname in contributor.names:
318
- if nickname in nicknames_seen:
319
- raise Exception(f"Name improperly shared between {contributor}"
320
- f" and {contributors_by_name[nickname]}")
321
- nicknames_seen.add(nickname)
322
- contributor_items.append((contributor.primary_name, contributor))
323
- seen.add(contributor)
324
- for name, contributor in sorted(contributor_items,
325
- # Having selected the longest names, now sort names ignoring case
326
- key=lambda pair: pair[0].lower()):
327
- contributors[name] = contributor
328
- return contributors
329
-
330
- @classmethod
331
- def by_email_from_by_name(cls, contributors_by_name_json):
332
- result = {}
333
- seen = set()
334
- for name_key, entry in contributors_by_name_json.items():
335
- ignored(name_key)
336
- seen_key = id(entry)
337
- if seen_key in seen:
338
- continue
339
- seen.add(seen_key)
340
- for email in entry.get("emails", []) if isinstance(entry, dict) else entry.emails:
341
- if result.get(email):
342
- raise Exception(f"email address {email} is used more than once.")
343
- result[email] = entry
344
- return result
345
-
346
- @classmethod
347
- def set_keys_as_primary_names(cls, contributors_by_name: Optional[ContributorIndex]):
348
- if contributors_by_name is not None:
349
- for key, contributor in contributors_by_name.items():
350
- contributor.set_primary_name(key)
351
-
352
-
353
- class Contributions(BasicContributions):
354
-
355
- def __init__(self, *, repo: Optional[str] = None,
356
- exclude_fork: Optional[str] = None,
357
- verbose: Optional[bool] = None):
358
- super().__init__(repo=repo, verbose=verbose)
359
- existing_contributor_data_file = self.existing_contributors_json_file()
360
- if existing_contributor_data_file:
361
- # This will set .loaded_contributor_data and other values from CONTRIBUTORS.json
362
- self.load_contributors_from_json_file_cache(existing_contributor_data_file)
363
-
364
- checkpoint1 = self.checkpoint_state()
365
- self.reconcile_contributors_with_github_log(exclude_fork=exclude_fork)
366
- checkpoint2 = self.checkpoint_state()
367
-
368
- def list_to_dict_normalizer(*, label, item):
369
- ignored(label)
370
- if isinstance(item, list):
371
- return {elem: elem for elem in item}
372
- else:
373
- return item
374
-
375
- if existing_contributor_data_file:
376
- diff_manager = DiffManager(label="contributors")
377
- contributors1 = checkpoint1['contributors_by_name']
378
- contributors2 = checkpoint2['contributors_by_name']
379
- diffs = diff_manager.diffs(contributors1, contributors2, normalizer=list_to_dict_normalizer)
380
- self.cache_discrepancies = self.resummarize_discrepancies(diffs)
381
-
382
- @classmethod
383
- def resummarize_discrepancies(cls, diffs: Dict) -> Dict:
384
- """
385
- Reformats the dictionary result from DiffManager.diffs in a way that's more appropriate to our situation.
386
- In particular:
387
-
388
- * the "added" key is renamed to "to add"
389
- * the "changed" key is renamed to "to_change"
390
- * the "removed" key is renamed to "to_remove"
391
-
392
- This tense change is necessary because the labels are cues to the user about actions that need to be done
393
- in the future, not actions already done.
394
- """
395
- added = diffs.get('added')
396
- changed = diffs.get('changed')
397
- removed = diffs.get('removed')
398
- cache_discrepancies = {}
399
- if added:
400
- cache_discrepancies['to_add'] = added
401
- if changed:
402
- cache_discrepancies['to_change'] = changed
403
- if removed:
404
- cache_discrepancies['to_remove'] = removed
405
- return cache_discrepancies
406
-
407
- def load_contributors_from_json_file_cache(self, filename):
408
- self.loaded_contributor_data = data = self.get_contributors_json_from_file_cache(filename)
409
- self.load_from_dict(data)
410
-
411
- if DEBUG_CONTRIBUTIONS: # pragma: no cover - debugging only
412
- PRINT("After load_contributors_from_json_file_cache...")
413
- PRINT(f"{n_of(self.pre_fork_contributors_by_name, 'pre-fork contributor by name')}")
414
- PRINT(f"{n_of(self.pre_fork_contributors_by_email, 'pre-fork contributor by email')}")
415
- PRINT(f"{n_of(self.contributors_by_name, 'contributor by name')}")
416
- PRINT(f"{n_of(self.contributors_by_email, 'contributor by email')}")
417
-
418
- def load_from_dict(self, data: Dict):
419
- forked_at: Optional[str] = data.get('forked_at')
420
- self.forked_at: Optional[datetime.datetime] = (None
421
- if forked_at is None
422
- else datetime.datetime.fromisoformat(forked_at))
423
-
424
- if 'excluded_fork' in data:
425
- # We originally implemented this, but it isn't needed and supporting it leads to problems. -kmp 30-Jul-2023
426
- raise ValueError('"excluded_fork" is no longer supported.')
427
-
428
- pre_fork_contributors_by_name_json = data.get('pre_fork_contributors_by_name') or {}
429
- pre_fork_contributors_by_name = self.contributor_values_as_objects(pre_fork_contributors_by_name_json)
430
- self.set_keys_as_primary_names(pre_fork_contributors_by_name)
431
- self.pre_fork_contributors_by_name = pre_fork_contributors_by_name
432
-
433
- if 'pre_fork_contributors_by_email' in data:
434
- # We originally implemented this, but supporting it is unnecessarily complex
435
- # because of redundancies of implementation and the possibility of ambiguities if both
436
- # markers are present. -kmp 30-Jul-2023
437
- raise ValueError('"pre_fork_contributors_by_email" is no longer supported.')
438
- pre_fork_contributors_by_email = self.by_email_from_by_name(pre_fork_contributors_by_name)
439
- self.pre_fork_contributors_by_email = pre_fork_contributors_by_email
440
-
441
- contributors_by_name_json = data.get('contributors_by_name') or {}
442
- contributors_by_name = self.contributor_values_as_objects(contributors_by_name_json)
443
- self.set_keys_as_primary_names(contributors_by_name)
444
- self.contributors_by_name = contributors_by_name
445
-
446
- if 'contributors_by_email' in data:
447
- # We originally implemented this, but supporting it is unnecessarily complex
448
- # because of redundancies of implementation and the possibility of ambiguities if both
449
- # markers are present. -kmp 30-Jul-2023
450
- raise ValueError('"contributors_by_email" is no longer supported.')
451
- contributors_by_email = self.by_email_from_by_name(contributors_by_name)
452
- self.contributors_by_email = contributors_by_email
453
-
454
- def reconcile_contributors_with_github_log(self, exclude_fork=None):
455
- """
456
- Rummages the GitHub log entries for contributors we don't know about.
457
- That data is merged against our existing structures.
458
- """
459
- if DEBUG_CONTRIBUTIONS: # pragma: no cover - debugging only
460
- PRINT("Reconciling with git log.")
461
-
462
- if self.loaded_contributor_data:
463
- pre_fork_contributor_emails = set(self.pre_fork_contributors_by_email.keys())
464
- elif exclude_fork:
465
- excluded_contributions = Contributions(repo=exclude_fork)
466
- self.pre_fork_contributors_by_email = excluded_contributions.contributors_by_email
467
- self.pre_fork_contributors_by_name = excluded_contributions.contributors_by_name
468
- pre_fork_contributor_emails = set(self.pre_fork_contributors_by_email.keys())
469
- else:
470
- pre_fork_contributor_emails = set()
471
-
472
- post_fork_contributors_seen = defaultdict(lambda: []) # pragma: no cover - for debugging only
473
-
474
- contributors_by_email: ContributorIndex = self.contributors_by_email or {}
475
- git_repo = self.find_repo(repo_name=self.repo)
476
-
477
- n = 0
478
-
479
- def notice_author(*, author: git.Actor, date: datetime.datetime):
480
- if self.forked_at:
481
- if date < self.forked_at:
482
- return
483
- # raise Exception("Commits are out of order.")
484
- elif author.email not in pre_fork_contributor_emails:
485
- action = "Created" if n == 1 else (f"Forked {exclude_fork} as" if exclude_fork else "Forked")
486
- PRINT(f"{action} {self.repo} at {date} by {author.email}")
487
- self.forked_at = date
488
-
489
- if self.forked_at and date >= self.forked_at:
490
- if DEBUG_CONTRIBUTIONS: # pragma: no cover - debugging only
491
- if author.email in pre_fork_contributor_emails: # pragma: no cover - this is for debugging
492
- # PRINT(f"Post-fork contribution from {author.email} ({date})")
493
- post_fork_contributors_seen[author.email].append(date)
494
- self.notice_reference_time(key=author.email, timestamp=date, timestamps=self.email_timestamps)
495
- self.notice_reference_time(key=GitAnalysis.author_name(author), timestamp=date,
496
- timestamps=self.name_timestamps)
497
-
498
- contributor_by_email = contributors_by_email.get(author.email)
499
- if contributor_by_email: # already exists, so update it
500
- contributor_by_email.notice_mention_as(email=author.email, name=GitAnalysis.author_name(author))
501
- else: # need to create it new
502
- contributor_by_email = Contributor.create(author=author)
503
- contributors_by_email[author.email] = contributor_by_email
504
- else:
505
- # print("skipped")
506
- pass
507
-
508
- for commit in reversed(list(git_repo.iter_commits())):
509
- n += 1
510
- commit_date = commit.committed_datetime
511
- notice_author(author=commit.author, date=commit_date)
512
- for co_author in commit.co_authors:
513
- notice_author(author=co_author, date=commit_date)
514
- if DEBUG_CONTRIBUTIONS: # pragma: no cover - debugging only
515
- PRINT(f"{n_of(n, 'commit')} processed.")
516
- for email, dates in post_fork_contributors_seen.items():
517
- when = str(dates[0].date())
518
- if len(dates) > 1:
519
- when += f" to {dates[-1].date()}"
520
- PRINT(f"{n_of(dates, 'post-fork commit')} seen for {email} ({when}).")
521
-
522
- contributors_by_name: ContributorIndex = {}
523
-
524
- for contributor_by_email in contributors_by_email.values():
525
- self.traverse(root=contributor_by_email,
526
- cursor=contributor_by_email,
527
- contributors_by_email=contributors_by_email,
528
- contributors_by_name=contributors_by_name)
529
- for name in list(contributor_by_email.names):
530
- contributors_by_name[name] = contributor_by_email
531
- for email in list(contributor_by_email.emails):
532
- contributors_by_email[email] = contributor_by_email
533
-
534
- self.contributors_by_name = self.contributor_index_by_primary_name(contributors_by_name)
535
- self.contributors_by_email = self.by_email_from_by_name(self.contributors_by_name) # contributors_by_email
536
-
537
- if DEBUG_CONTRIBUTIONS: # pragma: no cover - debugging only
538
- PRINT("After reconcile_contributors_with_github_log...")
539
- PRINT(f"{n_of(self.pre_fork_contributors_by_name, 'pre-fork contributor by name')}")
540
- PRINT(f"{n_of(self.pre_fork_contributors_by_email, 'pre-fork contributor by email')}")
541
- PRINT(f"{n_of(self.contributors_by_name, 'contributor by name')}")
542
- PRINT(f"{n_of(self.contributors_by_email, 'contributor by email')}")
543
-
544
- @classmethod
545
- def traverse(cls,
546
- root: Contributor,
547
- cursor: Optional[Contributor],
548
- contributors_by_email: ContributorIndex,
549
- contributors_by_name: ContributorIndex,
550
- seen: Optional[Set[Contributor]] = None):
551
- if seen is None:
552
- seen = set()
553
- if cursor in seen: # It's slightly possible that a person has a name of None that slipped in. Ignore that.
554
- return
555
- seen.add(cursor)
556
- for name in list(cursor.names):
557
- root.names.add(name)
558
- for email in list(cursor.emails):
559
- root.emails.add(email)
560
- for name in list(cursor.names):
561
- contributor = contributors_by_name.get(name)
562
- if contributor and contributor not in seen:
563
- cls.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email,
564
- contributors_by_name=contributors_by_name, seen=seen)
565
- for email in list(cursor.emails):
566
- contributor = contributors_by_email.get(email)
567
- if contributor and contributor not in seen: # pragma: no cover - shouldn't happen, included 'just in case'
568
- warnings.warn(f"Unexpected stray email seen: {email}")
569
- cls.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email,
570
- contributors_by_name=contributors_by_name, seen=seen)