gh-profiler 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ .venv/
2
+ __pycache__/
3
+
4
+ .DS_Store
5
+
@@ -0,0 +1 @@
1
+ 3.14
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.4
2
+ Name: gh-profiler
3
+ Version: 0.1.0
4
+ Summary: Examine a GH user's profile, and help quickly decide how much to invest in their contributions.
5
+ Requires-Python: >=3.14
6
+ Description-Content-Type: text/markdown
7
+
8
+ gh-profiler
9
+ ===
10
+
11
+ Like many, I've gotten waves of open source contributions where many of the new issues and PRs aren't worth engaging with. But it takes me a bit of time to sort through each of them.
12
+
13
+ People like to say that code should "speak for itself", but I've found that looking at a GitHub user's profile has been more helpful in making a quick determination about how much time to invest in the issue or PR. I typically look at a few quick things:
14
+
15
+ - Has the person made an unusually high number of PRs lately?
16
+ - Have a significant portion of these PRs been closed without merging?
17
+ - Have they opened an excessive number of issues?
18
+ - How old is the account?
19
+ - Is there any meaningful information on their profile?
20
+
21
+ I don't make a final decision about PRs and issues based on the answers to these questions, but many times I see enough red flags here that I have a good idea not to spend much time evaluating the contribution. (I'm mostly talking about PRs and issues where there's been no prior discussion, and there's a lot of text or changes in the PR/issue to review if I'm going to take it seriously.)
22
+
23
+ The goal of this project is to get a quick snapshot of this kind of information, without having to do a bunch of clicking on GitHub. The output is a summary of what's found, with a quick visual cue as to which factors support investing time in the PR/issue, and which factors suggest it's better off being closed and ignored. I have no interest in calculating some kind of trust score, or any other single number.
24
+
25
+ Usage
26
+ ---
27
+
@@ -0,0 +1,20 @@
1
+ gh-profiler
2
+ ===
3
+
4
+ Like many, I've gotten waves of open source contributions where many of the new issues and PRs aren't worth engaging with. But it takes me a bit of time to sort through each of them.
5
+
6
+ People like to say that code should "speak for itself", but I've found that looking at a GitHub user's profile has been more helpful in making a quick determination about how much time to invest in the issue or PR. I typically look at a few quick things:
7
+
8
+ - Has the person made an unusually high number of PRs lately?
9
+ - Have a significant portion of these PRs been closed without merging?
10
+ - Have they opened an excessive number of issues?
11
+ - How old is the account?
12
+ - Is there any meaningful information on their profile?
13
+
14
+ I don't make a final decision about PRs and issues based on the answers to these questions, but many times I see enough red flags here that I have a good idea not to spend much time evaluating the contribution. (I'm mostly talking about PRs and issues where there's been no prior discussion, and there's a lot of text or changes in the PR/issue to review if I'm going to take it seriously.)
15
+
16
+ The goal of this project is to get a quick snapshot of this kind of information, without having to do a bunch of clicking on GitHub. The output is a summary of what's found, with a quick visual cue as to which factors support investing time in the PR/issue, and which factors suggest it's better off being closed and ignored. I have no interest in calculating some kind of trust score, or any other single number.
17
+
18
+ Usage
19
+ ---
20
+
@@ -0,0 +1,14 @@
1
+ [project]
2
+ name = "gh-profiler"
3
+ version = "0.1.0"
4
+ description = "Examine a GH user's profile, and help quickly decide how much to invest in their contributions."
5
+ readme = "README.md"
6
+ requires-python = ">=3.14"
7
+ dependencies = []
8
+
9
+ [project.scripts]
10
+ gh-profiler = "gh_profiler.main:main"
11
+
12
+ [build-system]
13
+ requires = ["hatchling"]
14
+ build-backend = "hatchling.build"
@@ -0,0 +1 @@
1
+ """Make gh_profiler a package."""
@@ -0,0 +1,7 @@
1
+ """Allow project to run as a module."""
2
+
3
+ from .main import main
4
+
5
+
6
+ if __name__ == "__main__":
7
+ main()
@@ -0,0 +1,52 @@
1
+ """Examine a user's profile, and highlight evidence they're human or AI.
2
+
3
+ The goal is to help make quick, evidence-based decisions about how much time
4
+ to invest in reviewing PRs, and general interaction on open source projects.
5
+
6
+ Scores:
7
+ 3: green
8
+ 2: yellow
9
+ 1: red
10
+
11
+ Package, so usage can be:
12
+ $ uvx gh-profiler ehmatthes
13
+
14
+ Or, maybe from within a project:
15
+ $ uvx gh-profiler <pr-num>
16
+
17
+ Given a PR number, it finds the author of the PR and runs the profiler on that
18
+ user?
19
+ """
20
+
21
+ import sys
22
+
23
+ from .utils.profile_data import profile_data as pdata
24
+ from .utils import profile_utils
25
+ from .utils import analysis_utils
26
+ from .utils import summary_utils
27
+
28
+
29
+ gh_user = sys.argv[1]
30
+ pdata.username = gh_user
31
+
32
+
33
+ def main():
34
+ # Get all information we'll need about the user's profile.
35
+ profile_utils.get_profile_info()
36
+
37
+ # How old is the account?
38
+ analysis_utils.process_account_age()
39
+
40
+ # How much profile information is available?
41
+ analysis_utils.process_profile_info()
42
+
43
+ # What does recent PR activity look like?
44
+ profile_utils.get_pr_activity()
45
+ analysis_utils.process_pr_activity()
46
+
47
+ # Summarize findings.
48
+ summary_utils.show_summary()
49
+
50
+
51
+ if __name__ == "__main__":
52
+ main()
@@ -0,0 +1 @@
1
+ """Make utils/ modules importable."""
@@ -0,0 +1,53 @@
1
+ """Utils for analyzing account information."""
2
+
3
+ from datetime import datetime as dt
4
+ from datetime import timezone as tz
5
+
6
+ from .profile_data import profile_data as pdata
7
+ from . import flags
8
+
9
+
10
+ def process_account_age():
11
+ """Evaluate account age."""
12
+ ts_created = dt.fromisoformat(pdata.profile_info["created_at"])
13
+ pdata.account_age = dt.now(tz.utc) - ts_created
14
+
15
+ if pdata.account_age.days > 3 * 365:
16
+ pdata.flag_age = flags.green_flag
17
+ elif pdata.account_age.days > 90:
18
+ pdata.flag_age = yellow_flag
19
+ else:
20
+ pdata.flag_age = flags.red_flag
21
+
22
+ def process_profile_info():
23
+ """Evaluate available profile information.
24
+
25
+ Focus on: name, company, blog, lcoation, email, bio
26
+ """
27
+ fields = ["name", "company", "blog", "location", "email", "bio"]
28
+ pdata.profile_dict = {field:pdata.profile_info[field] for field in fields}
29
+
30
+ num_filled = sum(v not in (None, "") for v in pdata.profile_dict.values())
31
+ if num_filled == 0:
32
+ pdata.flag_profile = flags.red_flag
33
+ elif num_filled < 3:
34
+ pdata.flag_profile = flags.yellow_flag
35
+ else:
36
+ pdata.flag_profile = flags.green_flag
37
+
38
+
39
+ def process_pr_activity():
40
+ """Evaluate recent PR activity."""
41
+ ratio_merged = pdata.merged_count / pdata.opened_count
42
+ ratio_closed = pdata.closed_count / pdata.opened_count
43
+
44
+ if ratio_closed > 0.5:
45
+ pdata.flag_closed_pr = flags.red_flag
46
+ elif ratio_closed > 0.15:
47
+ pdata.flag_closed_pr = flags.yellow_flag
48
+ else:
49
+ pdata.flag_closed_pr = flags.green_flag
50
+
51
+ pdata.flag_merged_pr = None
52
+ if ratio_merged > 0.5:
53
+ pdata.flag_merged_pr = flags.green_flag
@@ -0,0 +1,5 @@
1
+ """Flags used for summarizing findings."""
2
+
3
+ red_flag = "\U0001f534"
4
+ yellow_flag = "\U0001f7e1"
5
+ green_flag = "\U0001f7e2"
@@ -0,0 +1,11 @@
1
+ """Utils not really specific to GitHub."""
2
+
3
+ import shlex
4
+ import subprocess
5
+
6
+
7
+ def run_cmd(cmd):
8
+ """Run a subprocess command, return stdout."""
9
+ cmd_parts = shlex.split(cmd)
10
+ output_obj = subprocess.run(cmd_parts, capture_output=True)
11
+ return output_obj.stdout.decode()
@@ -0,0 +1,23 @@
1
+ """One place to store all data about the user."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass
7
+ class ProfileData:
8
+ username: str = ""
9
+
10
+ profile_info: dict | None = None
11
+
12
+ account_age: int = 0
13
+
14
+ opened_count: int = 0
15
+ merged_count: int = 0
16
+ closed_count: int = 0
17
+
18
+ flag_age: str = ""
19
+ flag_merged_pr: str = ""
20
+ flag_closed_pr: str = ""
21
+
22
+
23
+ profile_data = ProfileData()
@@ -0,0 +1,33 @@
1
+ """Utils for retrieving user information."""
2
+
3
+ import json
4
+ from datetime import datetime as dt
5
+ from datetime import timezone as tz
6
+ from datetime import timedelta
7
+ from urllib.parse import quote
8
+
9
+ from .profile_data import profile_data as pdata
10
+ from . import infra_utils
11
+
12
+
13
+ def get_profile_info():
14
+ """Get all the profile info we'll need."""
15
+ cmd = f"gh api users/{pdata.username} --jq '{{login, name, created_at, company, blog, location, email, bio}}'"
16
+ profile_info = infra_utils.run_cmd(cmd)
17
+ pdata.profile_info = json.loads(profile_info)
18
+
19
+
20
+ def get_pr_activity():
21
+ """Get information about recent PR activity."""
22
+ cutoff = (dt.now(tz.utc) - timedelta(days=21)).date().isoformat()
23
+ base_query = f"author:{pdata.username} is:pr created:>={cutoff}"
24
+
25
+ opened_cmd = f'gh api "search/issues?q={quote(base_query)}" --jq .total_count'
26
+ merged_cmd = (
27
+ f'gh api "search/issues?q={quote(base_query + " is:merged")}" --jq .total_count'
28
+ )
29
+ closed_cmd = f'gh api "search/issues?q={quote(base_query + " is:closed -is:merged")}" --jq .total_count'
30
+
31
+ pdata.opened_count = int(infra_utils.run_cmd(opened_cmd).strip())
32
+ pdata.merged_count = int(infra_utils.run_cmd(merged_cmd).strip())
33
+ pdata.closed_count = int(infra_utils.run_cmd(closed_cmd).strip())
@@ -0,0 +1,64 @@
1
+ """Utils for summarizing findings."""
2
+
3
+ from .profile_data import profile_data as pdata
4
+ from . import flags
5
+
6
+
7
+ def show_summary():
8
+ """Show a concise summary of what was found."""
9
+ # Username, account age:
10
+ print(f"\nGitHub user: {pdata.username}")
11
+ print(f" {pdata.flag_age} Account age: {pdata.account_age.days} days")
12
+
13
+ # Available profile information:
14
+ if pdata.flag_profile == flags.red_flag:
15
+ print(f"\n {pdata.flag_profile} No profile information has been provided.")
16
+ else:
17
+ _show_profile_dict()
18
+
19
+ # Recent PR activity:
20
+ print()
21
+ if pdata.opened_count >= 10:
22
+ # Only show merged if it's a good sign.
23
+ if pdata.flag_merged_pr == flags.green_flag:
24
+ print(
25
+ f" {pdata.flag_merged_pr} {pdata.merged_count} of {pdata.opened_count} PRs have been merged in the last 21 days."
26
+ )
27
+ print(
28
+ f" {pdata.flag_closed_pr} {pdata.closed_count} of {pdata.opened_count} PRs have been closed without merging in the last 21 days."
29
+ )
30
+ else:
31
+ print(
32
+ f" {flags.green_flag} {pdata.username} has opened fewer than 10 PRs in the last 21 days."
33
+ )
34
+ print("")
35
+
36
+
37
+ # --- Helper functions ---
38
+
39
+ def _show_profile_dict():
40
+ """Summarize information from the user's profile dict."""
41
+ print(f"\n {pdata.flag_profile} Profile information:")
42
+
43
+ for k, v in pdata.profile_dict.items():
44
+ if v and k != "bio":
45
+ print(f" {k}: {v}")
46
+ elif k == "bio":
47
+ _show_bio(v)
48
+ else:
49
+ print(f" {k}:")
50
+
51
+ def _show_bio(bio):
52
+ """Show a bio appropriately."""
53
+ if bio in (None, ""):
54
+ print(f" bio:")
55
+ return
56
+
57
+ if bio.count("\n") == 0:
58
+ print(f" bio: {bio}")
59
+ return
60
+
61
+ # Print a multi-line bio.
62
+ print(" bio:")
63
+ for line in bio.splitlines():
64
+ print(f" {line}")
@@ -0,0 +1,8 @@
1
+ version = 1
2
+ revision = 3
3
+ requires-python = ">=3.14"
4
+
5
+ [[package]]
6
+ name = "gh-profiler"
7
+ version = "0.1.0"
8
+ source = { editable = "." }