prfiesta 0.2.1b228__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prfiesta-0.2.1b228/PKG-INFO +263 -0
- prfiesta-0.2.1b228/README.md +233 -0
- prfiesta-0.2.1b228/prfiesta/__init__.py +17 -0
- prfiesta-0.2.1b228/prfiesta/__main__.py +100 -0
- prfiesta-0.2.1b228/prfiesta/analysis/__init__.py +0 -0
- prfiesta-0.2.1b228/prfiesta/analysis/plot.py +128 -0
- prfiesta-0.2.1b228/prfiesta/analysis/view.py +34 -0
- prfiesta-0.2.1b228/prfiesta/collectors/__init__.py +0 -0
- prfiesta-0.2.1b228/prfiesta/collectors/github.py +169 -0
- prfiesta-0.2.1b228/prfiesta/environment.py +17 -0
- prfiesta-0.2.1b228/prfiesta/output.py +57 -0
- prfiesta-0.2.1b228/prfiesta/spinner.py +11 -0
- prfiesta-0.2.1b228/pyproject.toml +125 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: prfiesta
|
|
3
|
+
Version: 0.2.1b228
|
|
4
|
+
Summary: Collect and Analyze Individual Contributor Pull Requests
|
|
5
|
+
Keywords: pull-request,pull-request-review,performance-review
|
|
6
|
+
Author: kiran94
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Topic :: Utilities
|
|
11
|
+
Classifier: Topic :: Software Development
|
|
12
|
+
Classifier: Topic :: Software Development :: Version Control :: Git
|
|
13
|
+
Requires-Dist: pygithub>=1.58.1,<2.0.0
|
|
14
|
+
Requires-Dist: click>=8.1.3,<9.0.0
|
|
15
|
+
Requires-Dist: cloup>=2.1.0,<3.0.0
|
|
16
|
+
Requires-Dist: pandas>=2.0.1,<3.0.0
|
|
17
|
+
Requires-Dist: pyarrow>=18.1.0,<19.0.0
|
|
18
|
+
Requires-Dist: rich>=13.3.5,<14.0.0
|
|
19
|
+
Requires-Dist: seaborn>=0.12.2,<0.13.0
|
|
20
|
+
Requires-Dist: matplotlib>=3.7.1,<4.0.0
|
|
21
|
+
Requires-Dist: natural>=0.2.0,<0.3.0
|
|
22
|
+
Requires-Dist: urllib3<2
|
|
23
|
+
Requires-Dist: duckdb>=1.1.3,<2.0.0
|
|
24
|
+
Requires-Dist: numpy<2
|
|
25
|
+
Requires-Python: >=3.9
|
|
26
|
+
Project-URL: Homepage, https://pypi.org/project/prfiesta/
|
|
27
|
+
Project-URL: Repository, https://github.com/kiran94/prfiesta/pull/67
|
|
28
|
+
Project-URL: Documentation, https://github.com/kiran94/prfiesta/blob/main/README.md
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# prfiesta 🦜🥳
|
|
32
|
+
|
|
33
|
+
[](https://github.com/kiran94/prfiesta/actions/workflows/main.yml)  [](https://pypi.org/project/prfiesta/)
|
|
34
|
+
|
|
35
|
+
> Collect and Analyze Individual Contributor Pull Requests
|
|
36
|
+
|
|
37
|
+
`prfiesta` allows you to collect, analyze and celebrate pull requests made by an individual 🎉.
|
|
38
|
+
|
|
39
|
+
It can be used by engineers or managers to gain insights into all the great work the contributor has made over a specified period of time. A great use case of this tool is during a performance review process when you want to perform analysis on all the contributions made over the year.
|
|
40
|
+
|
|
41
|
+
[](https://asciinema.org/a/587987)
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
python -m pip install prfiesta
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
> [!TIP]
|
|
50
|
+
> If you are using [`uvx`](https://docs.astral.sh/uv/guides/tools/) then you can also just run this:
|
|
51
|
+
> ```bash
|
|
52
|
+
> uvx prfiesta --help
|
|
53
|
+
> ```
|
|
54
|
+
|
|
55
|
+
Dependencies:
|
|
56
|
+
|
|
57
|
+
- Python 3.9+
|
|
58
|
+
|
|
59
|
+
## Usage
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
# Authenticate yourself
|
|
63
|
+
export GITHUB_TOKEN=... # or GITHUB_ENTERPRISE_TOKEN
|
|
64
|
+
|
|
65
|
+
# Get all pull requests for a user
|
|
66
|
+
prfiesta -u kiran94
|
|
67
|
+
|
|
68
|
+
# Get all pull requests for a user created after a date
|
|
69
|
+
prfiesta -u kiran94 --after 2023-01-01
|
|
70
|
+
|
|
71
|
+
# Get all pull requests for a user created between two dates
|
|
72
|
+
prfiesta -u kiran94 --after 2023-01-01 --before 2023-06-01
|
|
73
|
+
|
|
74
|
+
# Get all pull requests for a user updated after a date
|
|
75
|
+
prfiesta -u kiran94 --after 2023-01-01 --use-updated
|
|
76
|
+
|
|
77
|
+
# Get all pull requests with a custom output file name
|
|
78
|
+
prfiesta -u kiran94 --output my_pull_requests.csv
|
|
79
|
+
|
|
80
|
+
# Get all pull requests in parquet format with a custom file name
|
|
81
|
+
prfiesta -u kiran94 --output-type parquet --output my_pull_requests.parquet
|
|
82
|
+
|
|
83
|
+
# Get all pull requests and export to a duckdb database
|
|
84
|
+
prfiesta -u kiran94 --output-type duckdb --output mydatabase.duckdb
|
|
85
|
+
|
|
86
|
+
# Get all pull requests for more then one user
|
|
87
|
+
prfiesta -u kiran94 -u user2
|
|
88
|
+
|
|
89
|
+
# Get all pull requests and drop specific columns from the output
|
|
90
|
+
prfiesta -u kiran94 -dc events_url -dc comments_url -dc node_id
|
|
91
|
+
|
|
92
|
+
# Get all pull requests where the user was involved (as opposed to just authored)
|
|
93
|
+
prfiesta -u kiran94 --use-involves
|
|
94
|
+
|
|
95
|
+
# Get all pull requests where the user reviewed it rather then being the author
|
|
96
|
+
prfiesta -u charliermarsh --after 2023-05-01 --use-reviewed-by
|
|
97
|
+
|
|
98
|
+
# Get all pull requests where the user was requested a review rather then being the author
|
|
99
|
+
prfiesta -u charliermarsh --after 2023-05-01 --use-review-requested
|
|
100
|
+
|
|
101
|
+
# Get all pull requests which contains a reference (e.g JIRA card reference) within the PR title or body
|
|
102
|
+
prfiesta --reference PA-12765
|
|
103
|
+
|
|
104
|
+
# Get help
|
|
105
|
+
prfiesta --help
|
|
106
|
+
|
|
107
|
+
# Show the current version
|
|
108
|
+
prfiesta --version
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
You can also leverage `prfiesta` directly in your own application:
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
import pandas as pd
|
|
115
|
+
|
|
116
|
+
from datetime import datetime
|
|
117
|
+
from prfiesta.collectors.github import GitHubCollector
|
|
118
|
+
|
|
119
|
+
github = GitHubCollector()
|
|
120
|
+
frame: pd.DataFrame = github.collect('kiran94', 'user2', after=datetime(2023, 1, 1))
|
|
121
|
+
|
|
122
|
+
print(frame)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Output
|
|
126
|
+
|
|
127
|
+
You can control the output type using the `--output-type` option. Supported options:
|
|
128
|
+
|
|
129
|
+
- `csv` (default)
|
|
130
|
+
- `parquet`
|
|
131
|
+
- [`duckdb`](https://duckdb.org/)
|
|
132
|
+
|
|
133
|
+
You can also customize the output file name using the `--output` option. When using `duckdb`, this argument is the duckdb database that we should export into. You can see an example of a duckdb workflow [here](https://github.com/kiran94/prfiesta/blob/main/notebooks/misc/duckdb_integration.ipynb).
|
|
134
|
+
|
|
135
|
+
### User Filter
|
|
136
|
+
|
|
137
|
+
By default, `prfiesta` will take the users provided in the `--user` option and search the Git provider for any pull requests that the user **authored**. Within more collaborative environments, this may not be what you want as you may want to also gain some visibility into all secondary contributions a user made (e.g commenting on others pull requests).
|
|
138
|
+
|
|
139
|
+
*The options listed here are mutually exclusive.*
|
|
140
|
+
|
|
141
|
+
#### User Involvement
|
|
142
|
+
|
|
143
|
+
`prfiesta` exposes the `--use-involves` flag which will search for pull requests that were:
|
|
144
|
+
|
|
145
|
+
- Created by a certain user
|
|
146
|
+
- Assigned to that user
|
|
147
|
+
- Mention that user
|
|
148
|
+
- commented on by that user
|
|
149
|
+
|
|
150
|
+
Learn more about `involves` [here](https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests#search-by-a-user-thats-involved-in-an-issue-or-pull-request).
|
|
151
|
+
|
|
152
|
+
#### User Reviewed
|
|
153
|
+
|
|
154
|
+
`prfiesta` exposes a `--use-reviewed-by` flag which will collect pull requests where the user *has reviewed* other's pull requests.
|
|
155
|
+
|
|
156
|
+
Learn more about searching review requests [here](https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests#search-by-pull-request-review-status-and-reviewer)
|
|
157
|
+
|
|
158
|
+
#### User Requested Review
|
|
159
|
+
|
|
160
|
+
`prfiesta` exposes a `--use-review-requested` flag which will collect pull requests where the user was *requested* a review from other collaborators.
|
|
161
|
+
|
|
162
|
+
Learn more about searching review requests [here](https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests#search-by-pull-request-review-status-and-reviewer)
|
|
163
|
+
|
|
164
|
+
### Date Filter
|
|
165
|
+
|
|
166
|
+
When using the `--after` and `--before` date filters, by default `prfiesta` will use the `created` date dimension with these filters on the Git provider (e.g GitHub). This may not fit your use case and you may want to filter on when a pull request was `updated` instead. To do this you can use the `--use-updated` flag.
|
|
167
|
+
|
|
168
|
+
Learn more about date filters [here](https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests#search-by-when-an-issue-or-pull-request-was-created-or-last-updated).
|
|
169
|
+
|
|
170
|
+
### Reference Search
|
|
171
|
+
|
|
172
|
+
You may come across a use case where you want to filter pull requests on a specific reference. For example, it may be a team practise to put a JIRA card reference within the pull request title or body.
|
|
173
|
+
|
|
174
|
+
For this you can use the `--reference` filter.
|
|
175
|
+
|
|
176
|
+
> [!NOTE]
|
|
177
|
+
> Results from reference search is entirely up to the GitHub Search API. On some ocassions, it may not provide inaccurate results.
|
|
178
|
+
|
|
179
|
+
## Analysis
|
|
180
|
+
|
|
181
|
+
`prfiesta` ships with built in plots to help analyze your pull request data. These serve as a starting point in your analysis. See more information on the build in plots and views [here](https://github.com/kiran94/prfiesta/blob/main/docs/analysis.md).
|
|
182
|
+
|
|
183
|
+
## Using GitHub Enterprise
|
|
184
|
+
|
|
185
|
+
If you trying to fetch data from a [GitHub Enterprise](https://docs.github.com/en/enterprise-cloud@latest/rest/enterprise-admin?apiVersion=2022-11-28) server, then much of the same functionality should work the same. You just need to make sure that:
|
|
186
|
+
|
|
187
|
+
- `GH_HOST` is set to your enterprise instance's API URL. Reach out to your internal GitHub team if you are not sure what this should be.
|
|
188
|
+
- `GITHUB_ENTERPRISE_TOKEN` a [personal access token](https://docs.github.com/en/enterprise-cloud@latest/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) generated on your GitHub Enterprise instance.
|
|
189
|
+
|
|
190
|
+
## GitHub Rate Limiting
|
|
191
|
+
|
|
192
|
+
Depending on your input parameters, you may end up in a situation where you are being [Rate Limited](https://docs.github.com/en/rest/overview/resources-in-the-rest-api?apiVersion=2022-11-28#rate-limiting) by the GitHub API.
|
|
193
|
+
|
|
194
|
+
See this [Notebook](https://github.com/kiran94/prfiesta/blob/main/notebooks/misc/rate_limit.ipynb) on a way to handle this.
|
|
195
|
+
|
|
196
|
+
## Environment Variables
|
|
197
|
+
|
|
198
|
+
| Variable | Description | Default |
|
|
199
|
+
| --------------- | --------------- | ------ |
|
|
200
|
+
| `GITHUB_TOKEN` | The Github [`Token`](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) to use. Must have the `repo` scope. | |
|
|
201
|
+
| `GITHUB_ENTERPRISE_TOKEN` | Takes precedence over `GITHUB_TOKEN` when set | |
|
|
202
|
+
| `GH_HOST` | The Github Host to communicate with (Override this with your company's GitHub Enterprise server if needed) | `https://api.github.com` |
|
|
203
|
+
| `LOGGING_LEVEL` | The [Logging Level](https://docs.python.org/3/library/logging.html#logging-levels) to use | `INFO` |
|
|
204
|
+
| `LOGGING_FORMAT` | The [Logging Format](https://docs.python.org/3/library/logging.html#logrecord-attributes) to use | `%(message)s` |
|
|
205
|
+
| `SPINNER_STYLE` | The [Spinner Style](https://rich.readthedocs.io/en/stable/reference/spinner.html) to use | `blue` |
|
|
206
|
+
|
|
207
|
+
## Developer Setup
|
|
208
|
+
|
|
209
|
+
Assuming you have cloned the repository and are at the root of the repository in your terminal.
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
uv sync
|
|
213
|
+
uv build
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
This should leave you in a state where all dependencies are installed and `prfiesta` is available through `uv run`.
|
|
217
|
+
|
|
218
|
+
You can then leverage the various commands in the [makefile](https://github.com/kiran94/prfiesta/blob/main/makefile) for development tasks:
|
|
219
|
+
|
|
220
|
+
```bash
|
|
221
|
+
# Run all unit tests
|
|
222
|
+
make test
|
|
223
|
+
|
|
224
|
+
# Produce code coverage reports
|
|
225
|
+
make coverage
|
|
226
|
+
|
|
227
|
+
# Code linting
|
|
228
|
+
make lint
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Optionally you can also install [pre-commit](https://github.com/pre-commit/pre-commit) to run some sanity checks before your commits.
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
# Install it into your git hooks (one time setup)
|
|
235
|
+
# from this point onwards, any commits will run pre-commit checks
|
|
236
|
+
precommit_install
|
|
237
|
+
|
|
238
|
+
# If you want to run all checks on all files without comitting.
|
|
239
|
+
precommit_run
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
### Creating Prereleases
|
|
243
|
+
|
|
244
|
+
When you create a pull request on this repository, various CI checks are run, towards the end of those checks there is a `release` job.
|
|
245
|
+
|
|
246
|
+
Usually when running under `main`, this job is responsible for publishing new versions to pypi. However when running under a pull request, this will create a special prerelease package specific to that pull request.
|
|
247
|
+
|
|
248
|
+
The versioning of this package follows [PEP-440](https://peps.python.org/pep-0440/#pre-releases) and will look something like this:
|
|
249
|
+
|
|
250
|
+
```
|
|
251
|
+
0.8.1b125
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
Where
|
|
255
|
+
- `0.8.1` = The bumped version of what is currently within the `pyproject.toml` of that pull request. We don't attempt to do any analysis to figure out if we should be bumping with a higher serverity in this context.
|
|
256
|
+
- `b` = Beta; Indicates to pypi that this is a prerelease package.
|
|
257
|
+
- `125` = The `github.run_number` from [GitHub Actions](https://docs.github.com/en/actions/learn-github-actions/contexts#github-context).
|
|
258
|
+
|
|
259
|
+
An example prerelease package looks like this: https://pypi.org/project/prfiesta/0.8.1b125/
|
|
260
|
+
|
|
261
|
+
Downstream users can then do a full end to end test with the prerelease package before the change is merged into `main`. This will automatically be posted into the pull request [example](https://github.com/kiran94/prfiesta/pull/36#issuecomment-1564909558).
|
|
262
|
+
|
|
263
|
+
You can find the full version history of package [here](https://pypi.org/project/prfiesta/#history)
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# prfiesta 🦜🥳
|
|
2
|
+
|
|
3
|
+
[](https://github.com/kiran94/prfiesta/actions/workflows/main.yml)  [](https://pypi.org/project/prfiesta/)
|
|
4
|
+
|
|
5
|
+
> Collect and Analyze Individual Contributor Pull Requests
|
|
6
|
+
|
|
7
|
+
`prfiesta` allows you to collect, analyze and celebrate pull requests made by an individual 🎉.
|
|
8
|
+
|
|
9
|
+
It can be used by engineers or managers to gain insights into all the great work the contributor has made over a specified period of time. A great use case of this tool is during a performance review process when you want to perform analysis on all the contributions made over the year.
|
|
10
|
+
|
|
11
|
+
[](https://asciinema.org/a/587987)
|
|
12
|
+
|
|
13
|
+
## Install
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
python -m pip install prfiesta
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
> [!TIP]
|
|
20
|
+
> If you are using [`uvx`](https://docs.astral.sh/uv/guides/tools/) then you can also just run this:
|
|
21
|
+
> ```bash
|
|
22
|
+
> uvx prfiesta --help
|
|
23
|
+
> ```
|
|
24
|
+
|
|
25
|
+
Dependencies:
|
|
26
|
+
|
|
27
|
+
- Python 3.9+
|
|
28
|
+
|
|
29
|
+
## Usage
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# Authenticate yourself
|
|
33
|
+
export GITHUB_TOKEN=... # or GITHUB_ENTERPRISE_TOKEN
|
|
34
|
+
|
|
35
|
+
# Get all pull requests for a user
|
|
36
|
+
prfiesta -u kiran94
|
|
37
|
+
|
|
38
|
+
# Get all pull requests for a user created after a date
|
|
39
|
+
prfiesta -u kiran94 --after 2023-01-01
|
|
40
|
+
|
|
41
|
+
# Get all pull requests for a user created between two dates
|
|
42
|
+
prfiesta -u kiran94 --after 2023-01-01 --before 2023-06-01
|
|
43
|
+
|
|
44
|
+
# Get all pull requests for a user updated after a date
|
|
45
|
+
prfiesta -u kiran94 --after 2023-01-01 --use-updated
|
|
46
|
+
|
|
47
|
+
# Get all pull requests with a custom output file name
|
|
48
|
+
prfiesta -u kiran94 --output my_pull_requests.csv
|
|
49
|
+
|
|
50
|
+
# Get all pull requests in parquet format with a custom file name
|
|
51
|
+
prfiesta -u kiran94 --output-type parquet --output my_pull_requests.parquet
|
|
52
|
+
|
|
53
|
+
# Get all pull requests and export to a duckdb database
|
|
54
|
+
prfiesta -u kiran94 --output-type duckdb --output mydatabase.duckdb
|
|
55
|
+
|
|
56
|
+
# Get all pull requests for more then one user
|
|
57
|
+
prfiesta -u kiran94 -u user2
|
|
58
|
+
|
|
59
|
+
# Get all pull requests and drop specific columns from the output
|
|
60
|
+
prfiesta -u kiran94 -dc events_url -dc comments_url -dc node_id
|
|
61
|
+
|
|
62
|
+
# Get all pull requests where the user was involved (as opposed to just authored)
|
|
63
|
+
prfiesta -u kiran94 --use-involves
|
|
64
|
+
|
|
65
|
+
# Get all pull requests where the user reviewed it rather then being the author
|
|
66
|
+
prfiesta -u charliermarsh --after 2023-05-01 --use-reviewed-by
|
|
67
|
+
|
|
68
|
+
# Get all pull requests where the user was requested a review rather then being the author
|
|
69
|
+
prfiesta -u charliermarsh --after 2023-05-01 --use-review-requested
|
|
70
|
+
|
|
71
|
+
# Get all pull requests which contains a reference (e.g JIRA card reference) within the PR title or body
|
|
72
|
+
prfiesta --reference PA-12765
|
|
73
|
+
|
|
74
|
+
# Get help
|
|
75
|
+
prfiesta --help
|
|
76
|
+
|
|
77
|
+
# Show the current version
|
|
78
|
+
prfiesta --version
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
You can also leverage `prfiesta` directly in your own application:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
import pandas as pd
|
|
85
|
+
|
|
86
|
+
from datetime import datetime
|
|
87
|
+
from prfiesta.collectors.github import GitHubCollector
|
|
88
|
+
|
|
89
|
+
github = GitHubCollector()
|
|
90
|
+
frame: pd.DataFrame = github.collect('kiran94', 'user2', after=datetime(2023, 1, 1))
|
|
91
|
+
|
|
92
|
+
print(frame)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Output
|
|
96
|
+
|
|
97
|
+
You can control the output type using the `--output-type` option. Supported options:
|
|
98
|
+
|
|
99
|
+
- `csv` (default)
|
|
100
|
+
- `parquet`
|
|
101
|
+
- [`duckdb`](https://duckdb.org/)
|
|
102
|
+
|
|
103
|
+
You can also customize the output file name using the `--output` option. When using `duckdb`, this argument is the duckdb database that we should export into. You can see an example of a duckdb workflow [here](https://github.com/kiran94/prfiesta/blob/main/notebooks/misc/duckdb_integration.ipynb).
|
|
104
|
+
|
|
105
|
+
### User Filter
|
|
106
|
+
|
|
107
|
+
By default, `prfiesta` will take the users provided in the `--user` option and search the Git provider for any pull requests that the user **authored**. Within more collaborative environments, this may not be what you want as you may want to also gain some visibility into all secondary contributions a user made (e.g commenting on others pull requests).
|
|
108
|
+
|
|
109
|
+
*The options listed here are mutually exclusive.*
|
|
110
|
+
|
|
111
|
+
#### User Involvement
|
|
112
|
+
|
|
113
|
+
`prfiesta` exposes the `--use-involves` flag which will search for pull requests that were:
|
|
114
|
+
|
|
115
|
+
- Created by a certain user
|
|
116
|
+
- Assigned to that user
|
|
117
|
+
- Mention that user
|
|
118
|
+
- commented on by that user
|
|
119
|
+
|
|
120
|
+
Learn more about `involves` [here](https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests#search-by-a-user-thats-involved-in-an-issue-or-pull-request).
|
|
121
|
+
|
|
122
|
+
#### User Reviewed
|
|
123
|
+
|
|
124
|
+
`prfiesta` exposes a `--use-reviewed-by` flag which will collect pull requests where the user *has reviewed* other's pull requests.
|
|
125
|
+
|
|
126
|
+
Learn more about searching review requests [here](https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests#search-by-pull-request-review-status-and-reviewer)
|
|
127
|
+
|
|
128
|
+
#### User Requested Review
|
|
129
|
+
|
|
130
|
+
`prfiesta` exposes a `--use-review-requested` flag which will collect pull requests where the user was *requested* a review from other collaborators.
|
|
131
|
+
|
|
132
|
+
Learn more about searching review requests [here](https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests#search-by-pull-request-review-status-and-reviewer)
|
|
133
|
+
|
|
134
|
+
### Date Filter
|
|
135
|
+
|
|
136
|
+
When using the `--after` and `--before` date filters, by default `prfiesta` will use the `created` date dimension with these filters on the Git provider (e.g GitHub). This may not fit your use case and you may want to filter on when a pull request was `updated` instead. To do this you can use the `--use-updated` flag.
|
|
137
|
+
|
|
138
|
+
Learn more about date filters [here](https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests#search-by-when-an-issue-or-pull-request-was-created-or-last-updated).
|
|
139
|
+
|
|
140
|
+
### Reference Search
|
|
141
|
+
|
|
142
|
+
You may come across a use case where you want to filter pull requests on a specific reference. For example, it may be a team practise to put a JIRA card reference within the pull request title or body.
|
|
143
|
+
|
|
144
|
+
For this you can use the `--reference` filter.
|
|
145
|
+
|
|
146
|
+
> [!NOTE]
|
|
147
|
+
> Results from reference search is entirely up to the GitHub Search API. On some ocassions, it may not provide inaccurate results.
|
|
148
|
+
|
|
149
|
+
## Analysis
|
|
150
|
+
|
|
151
|
+
`prfiesta` ships with built in plots to help analyze your pull request data. These serve as a starting point in your analysis. See more information on the build in plots and views [here](https://github.com/kiran94/prfiesta/blob/main/docs/analysis.md).
|
|
152
|
+
|
|
153
|
+
## Using GitHub Enterprise
|
|
154
|
+
|
|
155
|
+
If you trying to fetch data from a [GitHub Enterprise](https://docs.github.com/en/enterprise-cloud@latest/rest/enterprise-admin?apiVersion=2022-11-28) server, then much of the same functionality should work the same. You just need to make sure that:
|
|
156
|
+
|
|
157
|
+
- `GH_HOST` is set to your enterprise instance's API URL. Reach out to your internal GitHub team if you are not sure what this should be.
|
|
158
|
+
- `GITHUB_ENTERPRISE_TOKEN` a [personal access token](https://docs.github.com/en/enterprise-cloud@latest/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) generated on your GitHub Enterprise instance.
|
|
159
|
+
|
|
160
|
+
## GitHub Rate Limiting
|
|
161
|
+
|
|
162
|
+
Depending on your input parameters, you may end up in a situation where you are being [Rate Limited](https://docs.github.com/en/rest/overview/resources-in-the-rest-api?apiVersion=2022-11-28#rate-limiting) by the GitHub API.
|
|
163
|
+
|
|
164
|
+
See this [Notebook](https://github.com/kiran94/prfiesta/blob/main/notebooks/misc/rate_limit.ipynb) on a way to handle this.
|
|
165
|
+
|
|
166
|
+
## Environment Variables
|
|
167
|
+
|
|
168
|
+
| Variable | Description | Default |
|
|
169
|
+
| --------------- | --------------- | ------ |
|
|
170
|
+
| `GITHUB_TOKEN` | The Github [`Token`](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) to use. Must have the `repo` scope. | |
|
|
171
|
+
| `GITHUB_ENTERPRISE_TOKEN` | Takes precedence over `GITHUB_TOKEN` when set | |
|
|
172
|
+
| `GH_HOST` | The Github Host to communicate with (Override this with your company's GitHub Enterprise server if needed) | `https://api.github.com` |
|
|
173
|
+
| `LOGGING_LEVEL` | The [Logging Level](https://docs.python.org/3/library/logging.html#logging-levels) to use | `INFO` |
|
|
174
|
+
| `LOGGING_FORMAT` | The [Logging Format](https://docs.python.org/3/library/logging.html#logrecord-attributes) to use | `%(message)s` |
|
|
175
|
+
| `SPINNER_STYLE` | The [Spinner Style](https://rich.readthedocs.io/en/stable/reference/spinner.html) to use | `blue` |
|
|
176
|
+
|
|
177
|
+
## Developer Setup
|
|
178
|
+
|
|
179
|
+
Assuming you have cloned the repository and are at the root of the repository in your terminal.
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
uv sync
|
|
183
|
+
uv build
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
This should leave you in a state where all dependencies are installed and `prfiesta` is available through `uv run`.
|
|
187
|
+
|
|
188
|
+
You can then leverage the various commands in the [makefile](https://github.com/kiran94/prfiesta/blob/main/makefile) for development tasks:
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
# Run all unit tests
|
|
192
|
+
make test
|
|
193
|
+
|
|
194
|
+
# Produce code coverage reports
|
|
195
|
+
make coverage
|
|
196
|
+
|
|
197
|
+
# Code linting
|
|
198
|
+
make lint
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Optionally you can also install [pre-commit](https://github.com/pre-commit/pre-commit) to run some sanity checks before your commits.
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
# Install it into your git hooks (one time setup)
|
|
205
|
+
# from this point onwards, any commits will run pre-commit checks
|
|
206
|
+
precommit_install
|
|
207
|
+
|
|
208
|
+
# If you want to run all checks on all files without comitting.
|
|
209
|
+
precommit_run
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### Creating Prereleases
|
|
213
|
+
|
|
214
|
+
When you create a pull request on this repository, various CI checks are run, towards the end of those checks there is a `release` job.
|
|
215
|
+
|
|
216
|
+
Usually when running under `main`, this job is responsible for publishing new versions to pypi. However when running under a pull request, this will create a special prerelease package specific to that pull request.
|
|
217
|
+
|
|
218
|
+
The versioning of this package follows [PEP-440](https://peps.python.org/pep-0440/#pre-releases) and will look something like this:
|
|
219
|
+
|
|
220
|
+
```
|
|
221
|
+
0.8.1b125
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
Where
|
|
225
|
+
- `0.8.1` = The bumped version of what is currently within the `pyproject.toml` of that pull request. We don't attempt to do any analysis to figure out if we should be bumping with a higher serverity in this context.
|
|
226
|
+
- `b` = Beta; Indicates to pypi that this is a prerelease package.
|
|
227
|
+
- `125` = The `github.run_number` from [GitHub Actions](https://docs.github.com/en/actions/learn-github-actions/contexts#github-context).
|
|
228
|
+
|
|
229
|
+
An example prerelease package looks like this: https://pypi.org/project/prfiesta/0.8.1b125/
|
|
230
|
+
|
|
231
|
+
Downstream users can then do a full end to end test with the prerelease package before the change is merged into `main`. This will automatically be posted into the pull request [example](https://github.com/kiran94/prfiesta/pull/36#issuecomment-1564909558).
|
|
232
|
+
|
|
233
|
+
You can find the full version history of package [here](https://pypi.org/project/prfiesta/#history)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import importlib.metadata
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from rich.logging import RichHandler
|
|
6
|
+
|
|
7
|
+
__version__ = importlib.metadata.version(__name__)
|
|
8
|
+
|
|
9
|
+
LOGGING_LEVEL = os.environ.get("LOGGING_LEVEL", logging.INFO)
|
|
10
|
+
LOGGING_FORMAT = os.environ.get("LOGGING_FORMAT", "%(message)s")
|
|
11
|
+
SPINNER_STYLE = os.environ.get("SPINNER_STYLE", "blue")
|
|
12
|
+
|
|
13
|
+
logging.basicConfig(
|
|
14
|
+
level=LOGGING_LEVEL,
|
|
15
|
+
format=LOGGING_FORMAT,
|
|
16
|
+
handlers=[RichHandler(markup=True, show_path=False, show_time=False, show_level=True)],
|
|
17
|
+
)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
import cloup
|
|
6
|
+
from rich.live import Live
|
|
7
|
+
from rich.spinner import Spinner
|
|
8
|
+
from rich.text import Text
|
|
9
|
+
|
|
10
|
+
from prfiesta import SPINNER_STYLE, __version__
|
|
11
|
+
from prfiesta.collectors.github import GitHubCollector
|
|
12
|
+
from prfiesta.environment import GitHubEnvironment
|
|
13
|
+
from prfiesta.output import output_frame
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
github_environment = GitHubEnvironment()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@cloup.command()
|
|
21
|
+
@cloup.option_group(
|
|
22
|
+
"general options",
|
|
23
|
+
cloup.option("-u", "--users", multiple=True, help="The GitHub Users to search for. Can be multiple"),
|
|
24
|
+
cloup.option("--reference", help="A external ticket reference to search for e.g. JIRA-1234"),
|
|
25
|
+
)
|
|
26
|
+
@cloup.option_group(
|
|
27
|
+
"date filter options",
|
|
28
|
+
cloup.option("-a", "--after", type=click.DateTime(formats=["%Y-%m-%d"]), help="Only search for pull requests after this date e.g 2023-01-01"),
|
|
29
|
+
cloup.option("-b", "--before", type=click.DateTime(formats=["%Y-%m-%d"]), help="Only search for pull requests before this date e.g 2023-04-30"),
|
|
30
|
+
cloup.option("-d", "--use-updated", is_flag=True, default=False, help="filter on when the pr was last updated rather then created"),
|
|
31
|
+
)
|
|
32
|
+
@cloup.option_group(
|
|
33
|
+
"user filter options",
|
|
34
|
+
cloup.option("-i", "--use-involves", is_flag=True, default=False, help="collect prs where the users are the author or assignee or mentioned or commented"),
|
|
35
|
+
cloup.option("-r", "--use-reviewed-by", is_flag=True, default=False, help="collect prs where the users reviewed them"),
|
|
36
|
+
cloup.option("-rr", "--use-review-requested", is_flag=True, default=False, help="collect prs where the users were requested a review"),
|
|
37
|
+
constraint=cloup.constraints.mutually_exclusive,
|
|
38
|
+
help="Collect alternative details for the users. If omitted then just collect the prs that a user has authored.",
|
|
39
|
+
)
|
|
40
|
+
@cloup.option_group(
|
|
41
|
+
"output options",
|
|
42
|
+
cloup.option("-o", "--output", default=None, help="The output location"),
|
|
43
|
+
cloup.option(
|
|
44
|
+
"-ot",
|
|
45
|
+
"--output-type",
|
|
46
|
+
type=click.Choice(["csv", "parquet", "duckdb"]),
|
|
47
|
+
default="csv",
|
|
48
|
+
show_default=True,
|
|
49
|
+
show_choices=True,
|
|
50
|
+
help="The output format",
|
|
51
|
+
),
|
|
52
|
+
cloup.option("-dc", "--drop-columns", multiple=True, help="Drop columns from the output dataframe"),
|
|
53
|
+
)
|
|
54
|
+
@cloup.option_group(
|
|
55
|
+
"authentication options",
|
|
56
|
+
cloup.option("-x", "--url", help="The URL of the Git provider to use"),
|
|
57
|
+
cloup.option("-t", "--token", help="The Authentication token to use"),
|
|
58
|
+
)
|
|
59
|
+
@cloup.version_option(__version__)
|
|
60
|
+
def main(**kwargs) -> None:
|
|
61
|
+
users: tuple[str] = kwargs.get("users")
|
|
62
|
+
token: str = kwargs.get("token") or github_environment.get_token()
|
|
63
|
+
url: str = kwargs.get("url") or github_environment.get_url()
|
|
64
|
+
output: str = kwargs.get("output")
|
|
65
|
+
output_type: str = kwargs.get("output_type")
|
|
66
|
+
before: datetime = kwargs.get("before")
|
|
67
|
+
after: datetime = kwargs.get("after")
|
|
68
|
+
drop_columns: list[str] = list(kwargs.get("drop_columns"))
|
|
69
|
+
use_updated: bool = kwargs.get("use_updated")
|
|
70
|
+
use_involves: bool = kwargs.get("use_involves")
|
|
71
|
+
use_reviewed_by: bool = kwargs.get("use_reviewed_by")
|
|
72
|
+
use_review_requested: bool = kwargs.get("use_review_requested")
|
|
73
|
+
reference: str = kwargs.get("reference")
|
|
74
|
+
|
|
75
|
+
logger.info("[bold green]PR Fiesta 🦜🥳")
|
|
76
|
+
|
|
77
|
+
spinner = Spinner("dots", text=Text("Loading", style=SPINNER_STYLE))
|
|
78
|
+
|
|
79
|
+
with Live(spinner, refresh_per_second=20, transient=True):
|
|
80
|
+
collector = GitHubCollector(token=token, url=url, spinner=spinner, drop_columns=drop_columns)
|
|
81
|
+
pr_frame = collector.collect(
|
|
82
|
+
*users,
|
|
83
|
+
after=after,
|
|
84
|
+
before=before,
|
|
85
|
+
use_updated=use_updated,
|
|
86
|
+
use_involves=use_involves,
|
|
87
|
+
use_reviewed_by=use_reviewed_by,
|
|
88
|
+
use_review_requested=use_review_requested,
|
|
89
|
+
reference=reference,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if not pr_frame.empty:
|
|
93
|
+
logger.info("Found [bold green]%s[/bold green] pull requests!", pr_frame.shape[0])
|
|
94
|
+
|
|
95
|
+
output_frame(pr_frame, output_type, spinner=spinner, output_name=output)
|
|
96
|
+
logger.info("Time to analyze 🔎 See https://github.com/kiran94/prfiesta/blob/main/docs/analysis.md for some inspiration!")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
if __name__ == "__main__": # pragma: nocover
|
|
100
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import calendar
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import seaborn as sns
|
|
8
|
+
from matplotlib.axes import Axes
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
_months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def plot_state_distribution(data: pd.DataFrame, **kwargs) -> Union[plt.Figure, plt.Axes, pd.DataFrame]:
|
|
16
|
+
ax: Optional[Axes] = kwargs.get("ax")
|
|
17
|
+
palette: Optional[str] = kwargs.get("palette")
|
|
18
|
+
title: Optional[str] = kwargs.get("title", "State Distribution")
|
|
19
|
+
hue: Optional[str] = kwargs.get("hue", "repository_name")
|
|
20
|
+
|
|
21
|
+
if ax:
|
|
22
|
+
ax.set_title(title)
|
|
23
|
+
|
|
24
|
+
return sns.histplot(data, x="state", hue=hue, ax=ax, palette=palette)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def plot_overall_timeline(data: pd.DataFrame, **kwargs) -> Union[plt.Figure, plt.Axes, pd.DataFrame]:
|
|
28
|
+
ax: Optional[Axes] = kwargs.get("ax")
|
|
29
|
+
palette: Optional[str] = kwargs.get("palette")
|
|
30
|
+
title: Optional[str] = kwargs.get("title", "Overall Contributions")
|
|
31
|
+
hue: Optional[str] = kwargs.get("hue", "month")
|
|
32
|
+
|
|
33
|
+
temp = data.copy()
|
|
34
|
+
|
|
35
|
+
temp["month"] = temp["created_at"].dt.month_name()
|
|
36
|
+
temp["year"] = temp["created_at"].dt.year.astype(str)
|
|
37
|
+
|
|
38
|
+
temp = temp.groupby(["month", "year"])["id"].count()
|
|
39
|
+
temp = temp.reset_index()
|
|
40
|
+
|
|
41
|
+
# X Axis Ordering (Year)
|
|
42
|
+
x_order = temp["year"].unique().tolist()
|
|
43
|
+
x_order = [int(x) for x in x_order]
|
|
44
|
+
x_order.sort()
|
|
45
|
+
x_order = [str(x) for x in x_order]
|
|
46
|
+
|
|
47
|
+
# Hue Ordering (Months)
|
|
48
|
+
sorted_months = sorted(_months, key=lambda x: list(calendar.month_name).index(x))
|
|
49
|
+
|
|
50
|
+
p = sns.barplot(temp, x="year", y="id", hue=hue, ax=ax, order=x_order, hue_order=sorted_months, palette=palette)
|
|
51
|
+
|
|
52
|
+
if ax:
|
|
53
|
+
ax.set_title(title)
|
|
54
|
+
ax.legend(loc="upper left")
|
|
55
|
+
|
|
56
|
+
return p
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def plot_author_associations(data: pd.DataFrame, **kwargs) -> Union[plt.Figure, plt.Axes, pd.DataFrame]:
|
|
60
|
+
ax: Optional[Axes] = kwargs.get("ax")
|
|
61
|
+
palette: Optional[str] = kwargs.get("palette")
|
|
62
|
+
title: Optional[str] = kwargs.get("title", "Author Associations")
|
|
63
|
+
|
|
64
|
+
temp = data.groupby("author_association")["id"].count()
|
|
65
|
+
temp.name = "count"
|
|
66
|
+
|
|
67
|
+
return temp.plot.pie(ax=ax, title=title, legend=True, colormap=palette)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def plot_conventional_commit_breakdown(data: pd.DataFrame, **kwargs) -> Union[plt.Figure, plt.Axes, pd.DataFrame]:
|
|
71
|
+
ax: Optional[Axes] = kwargs.get("ax")
|
|
72
|
+
palette: Optional[str] = kwargs.get("palette")
|
|
73
|
+
title: Optional[str] = kwargs.get("title", "Conventional Commit Breakdown")
|
|
74
|
+
hue: Optional[str] = kwargs.get("hue", "type")
|
|
75
|
+
|
|
76
|
+
conventional_commit_frame = (
|
|
77
|
+
data["title"].str.extract(r"^(?P<type>feat|fix|docs|style|refactor|test|chore|build|ci|perf)(\((?P<scope>[A-Za-z-]+)\))?: (?P<subject>[^\n]+)$").copy()
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
conventional_commit_frame = conventional_commit_frame.drop(columns=[1, "subject"])
|
|
81
|
+
conventional_commit_frame = pd.concat([conventional_commit_frame, data["repository_name"]], axis=1)
|
|
82
|
+
conventional_commit_frame = conventional_commit_frame.dropna(subset=["type"])
|
|
83
|
+
|
|
84
|
+
if conventional_commit_frame.empty:
|
|
85
|
+
logger.warning("passed data did not seem to have any conventional commits")
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
type_count = conventional_commit_frame.groupby(["type", "repository_name"]).count().reset_index()
|
|
89
|
+
type_count = type_count[type_count["scope"] != 0]
|
|
90
|
+
type_count = type_count.sort_values(by="scope", ascending=False)
|
|
91
|
+
type_count = type_count.rename(columns={"scope": "count"})
|
|
92
|
+
|
|
93
|
+
p = sns.barplot(type_count, y="repository_name", x="count", hue=hue, ax=ax, palette=palette)
|
|
94
|
+
|
|
95
|
+
if ax:
|
|
96
|
+
ax.set_title(title)
|
|
97
|
+
ax.legend(loc="upper right")
|
|
98
|
+
|
|
99
|
+
return p
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def plot_reactions(data: pd.DataFrame, **kwargs) -> Union[plt.Figure, plt.Axes, pd.DataFrame]:
|
|
103
|
+
ax: Optional[Axes] = kwargs.get("ax")
|
|
104
|
+
palette: Optional[str] = kwargs.get("palette")
|
|
105
|
+
title: Optional[str] = kwargs.get("title", "Reactions")
|
|
106
|
+
hue: Optional[str] = kwargs.get("hue")
|
|
107
|
+
threshold: Optional[int] = kwargs.get("threshold")
|
|
108
|
+
|
|
109
|
+
reaction_columns = [x for x in data.columns.tolist() if x.startswith("reactions.") and x not in ["reactions.url"]]
|
|
110
|
+
|
|
111
|
+
reaction_df = data[reaction_columns]
|
|
112
|
+
reaction_df = reaction_df[reaction_df["reactions.total_count"] > 0]
|
|
113
|
+
reaction_df = reaction_df.drop(columns=["reactions.total_count"])
|
|
114
|
+
|
|
115
|
+
if reaction_df.empty:
|
|
116
|
+
logger.warning("passed data did not seem to have any reactions 🙁")
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
if threshold:
|
|
120
|
+
reaction_df = reaction_df[reaction_df < threshold]
|
|
121
|
+
|
|
122
|
+
p = sns.scatterplot(reaction_df, ax=ax, palette=palette, hue=hue)
|
|
123
|
+
|
|
124
|
+
if ax:
|
|
125
|
+
ax.set_title(title)
|
|
126
|
+
ax.legend(loc="upper right")
|
|
127
|
+
|
|
128
|
+
return p
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from IPython.display import HTML, DisplayObject
|
|
5
|
+
from natural.date import duration
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _enrich_pr_link(data: pd.DataFrame) -> pd.DataFrame:
|
|
9
|
+
def make_link(row: pd.Series) -> str:
|
|
10
|
+
return f'<a href="{row["html_url"]}">{row["title"]}</a>'
|
|
11
|
+
|
|
12
|
+
data["title"] = data.apply(make_link, axis=1)
|
|
13
|
+
return data.drop(columns="html_url")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def view_pull_requests(data: pd.DataFrame, **kwargs) -> DisplayObject:
|
|
17
|
+
as_frame: bool = kwargs.get("as_frame", False)
|
|
18
|
+
relative_dates: bool = kwargs.get("relative_dates", True)
|
|
19
|
+
head: int = kwargs.get("head")
|
|
20
|
+
|
|
21
|
+
temp = data[["number", "title", "repository_name", "updated_at", "html_url"]].copy()
|
|
22
|
+
temp = _enrich_pr_link(temp)
|
|
23
|
+
|
|
24
|
+
if relative_dates:
|
|
25
|
+
temp["updated_at"] = pd.to_datetime(temp["updated_at"])
|
|
26
|
+
temp["updated_at"] = temp["updated_at"].apply(duration, now=datetime.now(timezone.utc))
|
|
27
|
+
|
|
28
|
+
if head:
|
|
29
|
+
temp = temp.head(head)
|
|
30
|
+
|
|
31
|
+
if as_frame:
|
|
32
|
+
return temp
|
|
33
|
+
|
|
34
|
+
return HTML(temp.to_html(escape=False, index=False))
|
|
File without changes
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import List, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from github import Github
|
|
7
|
+
from github.GithubException import RateLimitExceededException
|
|
8
|
+
from rich.spinner import Spinner
|
|
9
|
+
|
|
10
|
+
from prfiesta.environment import GitHubEnvironment
|
|
11
|
+
from prfiesta.spinner import update_spinner
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GitHubCollector:
|
|
17
|
+
def __init__(self, **kwargs) -> None:
|
|
18
|
+
environment = GitHubEnvironment()
|
|
19
|
+
token = kwargs.get("token") or environment.get_token()
|
|
20
|
+
self._url = kwargs.get("url") or environment.get_url()
|
|
21
|
+
|
|
22
|
+
self._github = Github(token, base_url=self._url)
|
|
23
|
+
self._spinner: Spinner = kwargs.get("spinner")
|
|
24
|
+
|
|
25
|
+
self._sort_column = ["updated_at"]
|
|
26
|
+
self._drop_columns = kwargs.get("drop_columns") or ["node_id", "performed_via_github_app"]
|
|
27
|
+
|
|
28
|
+
self._move_to_end_columns = [
|
|
29
|
+
"url",
|
|
30
|
+
"repository_url",
|
|
31
|
+
"html_url",
|
|
32
|
+
"timeline_url",
|
|
33
|
+
"labels_url",
|
|
34
|
+
"comments_url",
|
|
35
|
+
"events_url",
|
|
36
|
+
]
|
|
37
|
+
self._datetime_columns = [
|
|
38
|
+
"created_at",
|
|
39
|
+
"updated_at",
|
|
40
|
+
"closed_at",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
def collect(
|
|
44
|
+
self,
|
|
45
|
+
*users: Optional[Tuple[str]],
|
|
46
|
+
after: Optional[datetime] = None,
|
|
47
|
+
before: Optional[datetime] = None,
|
|
48
|
+
use_updated: Optional[bool] = False,
|
|
49
|
+
use_involves: Optional[bool] = False,
|
|
50
|
+
use_reviewed_by: Optional[bool] = False,
|
|
51
|
+
use_review_requested: Optional[bool] = False,
|
|
52
|
+
reference: Optional[str] = None,
|
|
53
|
+
) -> pd.DataFrame:
|
|
54
|
+
query = self._construct_query(users, after, before, use_updated, use_involves, use_reviewed_by, use_review_requested, reference)
|
|
55
|
+
|
|
56
|
+
update_spinner(f"Searching {self._url} with[bold blue] {query}", self._spinner, logger)
|
|
57
|
+
|
|
58
|
+
pull_request_data = None
|
|
59
|
+
try:
|
|
60
|
+
pulls = self._github.search_issues(query=query)
|
|
61
|
+
|
|
62
|
+
pull_request_data: list[dict] = []
|
|
63
|
+
for pr in pulls:
|
|
64
|
+
pull_request_data.append(pr.__dict__["_rawData"])
|
|
65
|
+
|
|
66
|
+
except RateLimitExceededException as e:
|
|
67
|
+
logger.warning("🙇 You were rate limited by the GitHub API, try requesting less data.")
|
|
68
|
+
logger.debug(e)
|
|
69
|
+
return pd.DataFrame()
|
|
70
|
+
|
|
71
|
+
if not pull_request_data:
|
|
72
|
+
logger.warning("Did not find any results for this search criteria!")
|
|
73
|
+
return pd.DataFrame()
|
|
74
|
+
|
|
75
|
+
update_spinner("Post Processing", self._spinner, logger)
|
|
76
|
+
pr_frame = pd.json_normalize(pull_request_data)
|
|
77
|
+
|
|
78
|
+
pr_frame = pr_frame.drop(columns=self._drop_columns, errors="ignore")
|
|
79
|
+
pr_frame = pr_frame.sort_values(by=self._sort_column, ascending=False)
|
|
80
|
+
pr_frame = self._parse_datetime_columns(pr_frame)
|
|
81
|
+
pr_frame["repository_name"] = pr_frame["repository_url"].str.extract(r"(.*)\/repos\/(?P<repository_name>(.*))")["repository_name"]
|
|
82
|
+
pr_frame = self._move_column_to_end(pr_frame)
|
|
83
|
+
|
|
84
|
+
return pr_frame
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def _construct_query(
|
|
88
|
+
users: Optional[List[str]],
|
|
89
|
+
after: Optional[datetime] = None,
|
|
90
|
+
before: Optional[datetime] = None,
|
|
91
|
+
use_updated: Optional[bool] = False,
|
|
92
|
+
use_involves: Optional[bool] = False,
|
|
93
|
+
use_reviewed_by: Optional[bool] = False,
|
|
94
|
+
use_review_requested: Optional[bool] = False,
|
|
95
|
+
reference: Optional[str] = None,
|
|
96
|
+
) -> str:
|
|
97
|
+
"""
|
|
98
|
+
Constructs a GitHub Search Query
|
|
99
|
+
that returns pull requests made by the passed users and options.
|
|
100
|
+
|
|
101
|
+
Examples
|
|
102
|
+
--------
|
|
103
|
+
type:pr author:user1
|
|
104
|
+
type:pr author:user2 created:<=2021-01-01
|
|
105
|
+
type:pr author:user1 author:user2 created:2021-01-01..2021-03-01
|
|
106
|
+
type:pr author:user2 updated:>=2021-01-01
|
|
107
|
+
type:pr involves:user2
|
|
108
|
+
type:pr reviewed-by:user1
|
|
109
|
+
type:pr review-requested:user1
|
|
110
|
+
type:pr in:title,body "PA-12765"
|
|
111
|
+
|
|
112
|
+
All dates are inclusive.
|
|
113
|
+
See GitHub Docs for full options https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests
|
|
114
|
+
"""
|
|
115
|
+
query: List[str] = []
|
|
116
|
+
query.append("type:pr")
|
|
117
|
+
|
|
118
|
+
author_filter = "author"
|
|
119
|
+
if use_involves:
|
|
120
|
+
author_filter = "involves"
|
|
121
|
+
elif use_reviewed_by:
|
|
122
|
+
author_filter = "reviewed-by"
|
|
123
|
+
elif use_review_requested:
|
|
124
|
+
author_filter = "review-requested"
|
|
125
|
+
|
|
126
|
+
logger.debug("using author filter %s", author_filter)
|
|
127
|
+
|
|
128
|
+
for u in users:
|
|
129
|
+
query.append(f"{author_filter}:{u}")
|
|
130
|
+
|
|
131
|
+
time_filter = "created"
|
|
132
|
+
if use_updated:
|
|
133
|
+
time_filter = "updated"
|
|
134
|
+
|
|
135
|
+
logger.debug("using time filter %s", time_filter)
|
|
136
|
+
|
|
137
|
+
if before and after:
|
|
138
|
+
query.append(f"{time_filter}:{after.strftime('%Y-%m-%d')}..{before.strftime('%Y-%m-%d')}")
|
|
139
|
+
elif before:
|
|
140
|
+
query.append(f"{time_filter}:<={before.strftime('%Y-%m-%d')}")
|
|
141
|
+
elif after:
|
|
142
|
+
query.append(f"{time_filter}:>={after.strftime('%Y-%m-%d')}")
|
|
143
|
+
|
|
144
|
+
if reference:
|
|
145
|
+
query.append(f'in:title,body "{reference}"')
|
|
146
|
+
|
|
147
|
+
return " ".join(query)
|
|
148
|
+
|
|
149
|
+
def _move_column_to_end(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
150
|
+
for col in self._move_to_end_columns:
|
|
151
|
+
try:
|
|
152
|
+
df.insert(len(df.columns) - 1, col, df.pop(col))
|
|
153
|
+
df.drop(columns=col)
|
|
154
|
+
except KeyError:
|
|
155
|
+
# This can happen if the user provides a custom _drop_columns which
|
|
156
|
+
# removes the column before we can move it to the end
|
|
157
|
+
logger.debug("Attempted to move column %s but it did not exist", col)
|
|
158
|
+
|
|
159
|
+
return df
|
|
160
|
+
|
|
161
|
+
def _parse_datetime_columns(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
162
|
+
for col in self._datetime_columns:
|
|
163
|
+
df[col] = pd.to_datetime(df[col], errors="coerce")
|
|
164
|
+
return df
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == "__main__": # pragma: nocover
|
|
168
|
+
g = GitHubCollector()
|
|
169
|
+
logger.info(g._construct_query(["kiran94", "hello"], datetime(2021, 1, 1), datetime(2021, 3, 1)))
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from github.Consts import DEFAULT_BASE_URL as GITHUB_DEFAULT_BASE_URL
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GitHubEnvironment:
|
|
7
|
+
def get_token(self) -> str:
|
|
8
|
+
"""Gets the authentication token for this environment."""
|
|
9
|
+
token = os.environ.get("GITHUB_ENTERPRISE_TOKEN", os.environ.get("GITHUB_TOKEN"))
|
|
10
|
+
if not token:
|
|
11
|
+
raise ValueError("GITHUB_ENTERPRISE_TOKEN or GITHUB_TOKEN must be set")
|
|
12
|
+
|
|
13
|
+
return token
|
|
14
|
+
|
|
15
|
+
def get_url(self) -> str:
|
|
16
|
+
"""Gets the URL for the git provider."""
|
|
17
|
+
return os.environ.get("GH_HOST", GITHUB_DEFAULT_BASE_URL)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Literal, Optional
|
|
6
|
+
|
|
7
|
+
import duckdb
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from rich.spinner import Spinner
|
|
10
|
+
|
|
11
|
+
from prfiesta.spinner import update_spinner
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
OUTPUT_TYPE = Literal["csv", "parquet", "duckdb"]
|
|
16
|
+
WIN_ILLEGAL_FILENAME = r"[:\/\\\*\?\"\<\>\|]"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def output_frame(
|
|
20
|
+
frame: pd.DataFrame,
|
|
21
|
+
output_type: OUTPUT_TYPE,
|
|
22
|
+
spinner: Spinner,
|
|
23
|
+
output_name: Optional[str] = None,
|
|
24
|
+
timestamp: Optional[datetime] = None,
|
|
25
|
+
) -> None:
|
|
26
|
+
if not timestamp:
|
|
27
|
+
timestamp = datetime.now()
|
|
28
|
+
|
|
29
|
+
if not output_name:
|
|
30
|
+
output_name = f"export.{timestamp.strftime('%Y%m%d%H%M%S')}.{output_type}"
|
|
31
|
+
|
|
32
|
+
if os.name == "nt" and re.search(WIN_ILLEGAL_FILENAME, output_name):
|
|
33
|
+
msg = f"{output_name} is an invalid filename on windows"
|
|
34
|
+
raise ValueError(msg)
|
|
35
|
+
|
|
36
|
+
update_spinner(f"Writing export to {output_name}", spinner, logger)
|
|
37
|
+
|
|
38
|
+
if output_type == "csv":
|
|
39
|
+
frame.to_csv(output_name, index=False)
|
|
40
|
+
|
|
41
|
+
elif output_type == "parquet":
|
|
42
|
+
frame.to_parquet(output_name, index=False)
|
|
43
|
+
|
|
44
|
+
elif output_type == "duckdb":
|
|
45
|
+
duckdb_table = f"prfiesta_{timestamp.strftime('%Y%m%d_%H%M%S')}"
|
|
46
|
+
|
|
47
|
+
update_spinner(f"connecting to duckdb {output_name}", spinner, logger)
|
|
48
|
+
conn = duckdb.connect(output_name)
|
|
49
|
+
|
|
50
|
+
update_spinner(f"exporting to duckdb table {duckdb_table}", spinner, logger)
|
|
51
|
+
conn.execute(f"CREATE TABLE {duckdb_table} AS SELECT * FROM frame") # noqa: S608 duckdb_table is always constructed internally
|
|
52
|
+
|
|
53
|
+
conn.close()
|
|
54
|
+
else:
|
|
55
|
+
raise ValueError("unknown output_type %s", output_type)
|
|
56
|
+
|
|
57
|
+
logger.info("Exported to %s!", output_name)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from logging import Logger
|
|
2
|
+
|
|
3
|
+
from rich.spinner import Spinner
|
|
4
|
+
|
|
5
|
+
from prfiesta import SPINNER_STYLE
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def update_spinner(message: str, spinner: Spinner, logger: Logger) -> None:
|
|
9
|
+
logger.debug(message)
|
|
10
|
+
if spinner:
|
|
11
|
+
spinner.update(text=message, style=SPINNER_STYLE)
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "prfiesta"
|
|
3
|
+
version = "0.2.1b228"
|
|
4
|
+
description = "Collect and Analyze Individual Contributor Pull Requests"
|
|
5
|
+
authors = [{ name = "kiran94" }]
|
|
6
|
+
license = { text = "MIT" }
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
requires-python = ">=3.9"
|
|
9
|
+
keywords = [ "pull-request", "pull-request-review", "performance-review" ]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Intended Audience :: Developers",
|
|
12
|
+
"Operating System :: OS Independent",
|
|
13
|
+
"Topic :: Utilities",
|
|
14
|
+
"Topic :: Software Development",
|
|
15
|
+
"Topic :: Software Development :: Version Control :: Git"
|
|
16
|
+
]
|
|
17
|
+
dependencies = [
|
|
18
|
+
"pygithub>=1.58.1,<2.0.0",
|
|
19
|
+
"click>=8.1.3,<9.0.0",
|
|
20
|
+
"cloup>=2.1.0,<3.0.0",
|
|
21
|
+
"pandas>=2.0.1,<3.0.0",
|
|
22
|
+
"pyarrow>=18.1.0,<19.0.0",
|
|
23
|
+
"rich>=13.3.5,<14.0.0",
|
|
24
|
+
"seaborn>=0.12.2,<0.13.0",
|
|
25
|
+
"matplotlib>=3.7.1,<4.0.0",
|
|
26
|
+
"natural>=0.2.0,<0.3.0",
|
|
27
|
+
"urllib3<2",
|
|
28
|
+
"duckdb>=1.1.3,<2.0.0",
|
|
29
|
+
"numpy<2",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://pypi.org/project/prfiesta/"
|
|
34
|
+
Repository = "https://github.com/kiran94/prfiesta/pull/67"
|
|
35
|
+
Documentation = "https://github.com/kiran94/prfiesta/blob/main/README.md"
|
|
36
|
+
|
|
37
|
+
[project.scripts]
|
|
38
|
+
prfiesta = "prfiesta.__main__:main"
|
|
39
|
+
|
|
40
|
+
[dependency-groups]
|
|
41
|
+
dev = [
|
|
42
|
+
"pytest>=7.3.1,<8.0.0",
|
|
43
|
+
"pytest-cov>=4.0.0,<5.0.0",
|
|
44
|
+
"pytest-github-actions-annotate-failures>=0.2.0,<0.3.0",
|
|
45
|
+
"jupyterlab-widgets>=3.0.7,<4.0.0",
|
|
46
|
+
"jupyter-black>=0.3.4,<0.4.0",
|
|
47
|
+
"ipywidgets>=8.0.6,<9.0.0",
|
|
48
|
+
"pre-commit>=3.3.2,<4.0.0",
|
|
49
|
+
"ruff>=0.1.0,<0.2.0",
|
|
50
|
+
"jupyterlab>=4.3.4,<5.0.0",
|
|
51
|
+
"papermill>=2.6.0,<3.0.0",
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
[build-system]
|
|
55
|
+
requires = ["uv_build>=0.11.21,<0.12"]
|
|
56
|
+
build-backend = "uv_build"
|
|
57
|
+
|
|
58
|
+
[tool.uv.build-backend]
|
|
59
|
+
module-root = ""
|
|
60
|
+
module-name = "prfiesta"
|
|
61
|
+
|
|
62
|
+
[tool.ruff]
|
|
63
|
+
line-length = 160
|
|
64
|
+
target-version = "py38"
|
|
65
|
+
|
|
66
|
+
# All Rules: https://beta.ruff.rs/docs/rules/
|
|
67
|
+
select = [
|
|
68
|
+
"F", # pyflakes
|
|
69
|
+
"E", # pycodestyle
|
|
70
|
+
"C90", # macabe complexity
|
|
71
|
+
"I", # isort
|
|
72
|
+
"W", # warning
|
|
73
|
+
"N", # pep8-naming
|
|
74
|
+
"UP", # pyupgrade
|
|
75
|
+
"YTT", # flake8-2020
|
|
76
|
+
"ANN", # flake8-annotations
|
|
77
|
+
"S", # flake8-bandit
|
|
78
|
+
"BLE", # flake8-blind-except
|
|
79
|
+
"B", # flake8-bugbear
|
|
80
|
+
"A", # flake8-builtins
|
|
81
|
+
"COM", # flake8-commas
|
|
82
|
+
"C4", # flake8-comprehensions
|
|
83
|
+
# "DTZ", # flake8-datetimez
|
|
84
|
+
"T10", # flake8-debugger
|
|
85
|
+
"DJ", # flake8-django
|
|
86
|
+
"EM", # flake8-errmsg
|
|
87
|
+
"EXE", # flake8-executable
|
|
88
|
+
"ISC", # flake8-implicit-str-concat
|
|
89
|
+
"ICN", # flake8-import-conventions
|
|
90
|
+
"G", # flake8-logging-format
|
|
91
|
+
"INP", # flake8-no-pep420
|
|
92
|
+
"PIE", # flake8-pie
|
|
93
|
+
"T20", # flake8-print
|
|
94
|
+
"PYI", # flake8-pyi
|
|
95
|
+
"PT", # flake8-pytest-style
|
|
96
|
+
"Q", # flake8-quotes
|
|
97
|
+
"RSE", # flake8-raise
|
|
98
|
+
"RET", # flake8-return
|
|
99
|
+
# "SLF", # flake8-self
|
|
100
|
+
"SIM", # flake8-simplify
|
|
101
|
+
"TID", # flake8-tidy-imports
|
|
102
|
+
"INT", # flake8-gettext
|
|
103
|
+
"ARG", # flake8-unused-arguments
|
|
104
|
+
# "PTH", # flake8-use-pathlib
|
|
105
|
+
"ERA", # eradicate
|
|
106
|
+
"PD", # pandas-vet
|
|
107
|
+
"PGH", # pygrep-hooks
|
|
108
|
+
"PL", # pylint
|
|
109
|
+
"PLE", # error
|
|
110
|
+
"PLR", # refactor
|
|
111
|
+
"PLW", # warning
|
|
112
|
+
"NPY", # numpy-specific-rules
|
|
113
|
+
"RUF", # ruff-specific-rules
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
ignore = [
|
|
117
|
+
"S101", # asserts are used in unit tests
|
|
118
|
+
"EM101", # not a big deal if the error message is duplicated in the traceback
|
|
119
|
+
"ANN101", # ignore missing type annotation on self
|
|
120
|
+
"ANN003", # ignore missing type annotations on **kwargs
|
|
121
|
+
"PLR0913", # ignore for test cases with high level of paramtatization
|
|
122
|
+
"COM812", # disable to avoid conflict with formatter
|
|
123
|
+
"ISC001", # disable to avoid conflict with formatter
|
|
124
|
+
"RET504", # sometimes decreases code readability
|
|
125
|
+
]
|