praiser 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. praiser-0.1.0/LICENSE +28 -0
  2. praiser-0.1.0/PKG-INFO +350 -0
  3. praiser-0.1.0/README.md +321 -0
  4. praiser-0.1.0/praiser/__init__.py +7 -0
  5. praiser-0.1.0/praiser/__main__.py +6 -0
  6. praiser-0.1.0/praiser/cache.py +60 -0
  7. praiser-0.1.0/praiser/cli.py +285 -0
  8. praiser-0.1.0/praiser/config.py +86 -0
  9. praiser-0.1.0/praiser/crossforge.py +188 -0
  10. praiser-0.1.0/praiser/data/__init__.py +1 -0
  11. praiser-0.1.0/praiser/data/known_projects.json +95 -0
  12. praiser-0.1.0/praiser/discovery.py +218 -0
  13. praiser-0.1.0/praiser/extractors/__init__.py +54 -0
  14. praiser-0.1.0/praiser/extractors/authors.py +107 -0
  15. praiser-0.1.0/praiser/extractors/base.py +106 -0
  16. praiser-0.1.0/praiser/extractors/codeowners.py +133 -0
  17. praiser-0.1.0/praiser/extractors/contributors.py +89 -0
  18. praiser-0.1.0/praiser/extractors/curated.py +41 -0
  19. praiser-0.1.0/praiser/extractors/enhancement_proposals.py +254 -0
  20. praiser-0.1.0/praiser/extractors/governance.py +127 -0
  21. praiser-0.1.0/praiser/extractors/llm_founders.py +50 -0
  22. praiser-0.1.0/praiser/extractors/maintainers.py +157 -0
  23. praiser-0.1.0/praiser/extractors/manifests.py +184 -0
  24. praiser-0.1.0/praiser/extractors/ownership.py +29 -0
  25. praiser-0.1.0/praiser/extractors/packages.py +66 -0
  26. praiser-0.1.0/praiser/extractors/subcomponents.py +76 -0
  27. praiser-0.1.0/praiser/extractors/web_roles.py +131 -0
  28. praiser-0.1.0/praiser/extractors/wikidata.py +95 -0
  29. praiser-0.1.0/praiser/forge/__init__.py +35 -0
  30. praiser-0.1.0/praiser/forge/_http.py +86 -0
  31. praiser-0.1.0/praiser/forge/base.py +265 -0
  32. praiser-0.1.0/praiser/forge/bitbucket.py +171 -0
  33. praiser-0.1.0/praiser/forge/cgit.py +95 -0
  34. praiser-0.1.0/praiser/forge/gitea.py +179 -0
  35. praiser-0.1.0/praiser/forge/gitee.py +175 -0
  36. praiser-0.1.0/praiser/forge/github.py +309 -0
  37. praiser-0.1.0/praiser/forge/gitlab.py +197 -0
  38. praiser-0.1.0/praiser/github_client.py +462 -0
  39. praiser-0.1.0/praiser/identity.py +25 -0
  40. praiser-0.1.0/praiser/llm.py +247 -0
  41. praiser-0.1.0/praiser/models.py +253 -0
  42. praiser-0.1.0/praiser/pipeline.py +313 -0
  43. praiser-0.1.0/praiser/popularity.py +109 -0
  44. praiser-0.1.0/praiser/progress.py +67 -0
  45. praiser-0.1.0/praiser/registries.py +260 -0
  46. praiser-0.1.0/praiser/registry.py +311 -0
  47. praiser-0.1.0/praiser/render.py +219 -0
  48. praiser-0.1.0/praiser.egg-info/PKG-INFO +350 -0
  49. praiser-0.1.0/praiser.egg-info/SOURCES.txt +93 -0
  50. praiser-0.1.0/praiser.egg-info/dependency_links.txt +1 -0
  51. praiser-0.1.0/praiser.egg-info/entry_points.txt +2 -0
  52. praiser-0.1.0/praiser.egg-info/requires.txt +12 -0
  53. praiser-0.1.0/praiser.egg-info/top_level.txt +1 -0
  54. praiser-0.1.0/pyproject.toml +53 -0
  55. praiser-0.1.0/setup.cfg +4 -0
  56. praiser-0.1.0/tests/test_attribute.py +69 -0
  57. praiser-0.1.0/tests/test_authors.py +65 -0
  58. praiser-0.1.0/tests/test_bitbucket_forge.py +108 -0
  59. praiser-0.1.0/tests/test_cgit_forge.py +70 -0
  60. praiser-0.1.0/tests/test_cli.py +75 -0
  61. praiser-0.1.0/tests/test_codeowners.py +40 -0
  62. praiser-0.1.0/tests/test_contributors.py +123 -0
  63. praiser-0.1.0/tests/test_crossforge.py +153 -0
  64. praiser-0.1.0/tests/test_curated.py +65 -0
  65. praiser-0.1.0/tests/test_discovery.py +42 -0
  66. praiser-0.1.0/tests/test_enhancement_proposals.py +96 -0
  67. praiser-0.1.0/tests/test_file_fetch.py +63 -0
  68. praiser-0.1.0/tests/test_forge_base.py +51 -0
  69. praiser-0.1.0/tests/test_gitea_forge.py +120 -0
  70. praiser-0.1.0/tests/test_gitee_forge.py +87 -0
  71. praiser-0.1.0/tests/test_github_client.py +58 -0
  72. praiser-0.1.0/tests/test_github_forge.py +149 -0
  73. praiser-0.1.0/tests/test_gitlab_forge.py +116 -0
  74. praiser-0.1.0/tests/test_governance.py +43 -0
  75. praiser-0.1.0/tests/test_involvement.py +35 -0
  76. praiser-0.1.0/tests/test_llm_founders.py +63 -0
  77. praiser-0.1.0/tests/test_maintainers.py +44 -0
  78. praiser-0.1.0/tests/test_manifests.py +130 -0
  79. praiser-0.1.0/tests/test_manual_repos.py +76 -0
  80. praiser-0.1.0/tests/test_multiforge.py +25 -0
  81. praiser-0.1.0/tests/test_ownership.py +29 -0
  82. praiser-0.1.0/tests/test_packages_extractor.py +78 -0
  83. praiser-0.1.0/tests/test_pipeline_helpers.py +21 -0
  84. praiser-0.1.0/tests/test_popularity.py +102 -0
  85. praiser-0.1.0/tests/test_progress.py +56 -0
  86. praiser-0.1.0/tests/test_registries.py +177 -0
  87. praiser-0.1.0/tests/test_registry.py +67 -0
  88. praiser-0.1.0/tests/test_render.py +167 -0
  89. praiser-0.1.0/tests/test_result_cache.py +76 -0
  90. praiser-0.1.0/tests/test_role_discovery.py +98 -0
  91. praiser-0.1.0/tests/test_subcomponents.py +59 -0
  92. praiser-0.1.0/tests/test_web_cache.py +105 -0
  93. praiser-0.1.0/tests/test_web_roles.py +89 -0
  94. praiser-0.1.0/tests/test_web_service.py +134 -0
  95. praiser-0.1.0/tests/test_wikidata.py +93 -0
praiser-0.1.0/LICENSE ADDED
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2026, Pearu Peterson
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its contributors
16
+ may be used to endorse or promote products derived from this software
17
+ without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
praiser-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,350 @@
1
+ Metadata-Version: 2.4
2
+ Name: praiser
3
+ Version: 0.1.0
4
+ Summary: Record the popular open-source projects where a person holds an elevated role (author, maintainer, steering council, standards author, core contributor), with evidence.
5
+ Author-email: Pearu Peterson <pearu.peterson@gmail.com>
6
+ License-Expression: BSD-3-Clause
7
+ Project-URL: Homepage, https://github.com/openteams-ai/praiser
8
+ Project-URL: Repository, https://github.com/openteams-ai/praiser
9
+ Project-URL: Issues, https://github.com/openteams-ai/praiser/issues
10
+ Keywords: open-source,maintainer,contributions,github,roles
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
15
+ Classifier: Environment :: Console
16
+ Classifier: Topic :: Software Development
17
+ Requires-Python: >=3.11
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Provides-Extra: http
21
+ Requires-Dist: httpx>=0.27; extra == "http"
22
+ Provides-Extra: llm
23
+ Requires-Dist: anthropic>=0.40; extra == "llm"
24
+ Provides-Extra: yaml
25
+ Requires-Dist: pyyaml>=6; extra == "yaml"
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=7; extra == "dev"
28
+ Dynamic: license-file
29
+
30
+ # praiser
31
+
32
+ [![CI](https://github.com/openteams-ai/praiser/actions/workflows/ci.yml/badge.svg)](https://github.com/openteams-ai/praiser/actions/workflows/ci.yml)
33
+
34
+ **🌟 Try the web demo: <https://praiser.streamlit.app/>**
35
+
36
+ Given a username, **praiser** records the popular open-source projects where
37
+ that person holds an **elevated role** — author/creator, maintainer, code owner,
38
+ steering-council member, standards (PEP/RFC) author, or core contributor — with
39
+ a clickable **evidence link** and a **confidence** score for every claim. Plain
40
+ drive-by contributors are intentionally excluded (the record would otherwise be
41
+ enormous and low-signal). By default it prints a compact **highlights** summary;
42
+ `--format md|json` gives the full per-project report.
43
+
44
+ Projects record roles in many different ways — a `CODEOWNERS` file, a
45
+ `MAINTAINERS` list, Kubernetes `OWNERS` YAML, a `GOVERNANCE.md` page, a package
46
+ manifest's author field, a numbered enhancement-proposal series with `Author:`
47
+ headers, a team page on the project's website, the commit history, package
48
+ registries (PyPI/npm/crates), or **Wikidata** creator/developer claims (matched
49
+ by GitHub handle). `praiser` figures out **which convention each project uses**
50
+ rather than assuming one, and corroborates signals that a fork or vendored copy
51
+ could fake.
52
+
53
+ It scans **GitHub** by default, plus **GitLab** (`--forge gitlab`), **Codeberg**
54
+ / any Gitea/Forgejo host (`--forge codeberg`), **Gitee** (`--forge gitee`),
55
+ **Bitbucket** (`--forge bitbucket`), and API-less **cgit** hosts like kernel.org
56
+ or Savannah (`--forge cgit`) — including **self-hosted instances** with
57
+ `--forge-url` (e.g. `--forge gitlab --forge-url https://gitlab.gnome.org`). The
58
+ pipeline talks to a neutral `Forge` interface, so adding another host is a
59
+ self-contained addition.
60
+
61
+ People use different usernames on different forges, so `--cross-forge` follows
62
+ the links a person publishes on their own profile — and on the personal site
63
+ those profiles point to — to their accounts elsewhere, keeping only links
64
+ confirmed either **bidirectionally** (the other profile links back) or through
65
+ an **owned personal-site hub** (a site reached from, and linking back to, a
66
+ confirmed account, that also lists the other account with a matching
67
+ handle/name), and merges everything into one record. Because the links are owner-published and
68
+ mutually confirmed, it never falsely merges two different people (it may
69
+ under-merge someone who hasn't cross-linked, which is safe). `--also-forge
70
+ FORGE:LOGIN` adds an identity explicitly when you'd rather not rely on links.
71
+
72
+ ## Install
73
+
74
+ ```bash
75
+ pip install praiser # core (stdlib only)
76
+ pip install 'praiser[http]' # + httpx (faster, pooled HTTP; recommended)
77
+ pip install 'praiser[http,llm,yaml]' # + Claude fallback + YAML role files
78
+ ```
79
+
80
+ The core has **no dependencies** (it runs on the stdlib), so `pip install praiser`
81
+ is enough to get the `praiser` command. The extras add: `http` (httpx, falls back
82
+ to urllib), `llm` (Claude fallback for ambiguous governance prose), `yaml`
83
+ (Kubernetes `OWNERS` / YAML-front-matter proposals).
84
+
85
+ From a checkout, for development:
86
+
87
+ ```bash
88
+ pip install -e '.[http,llm,yaml,dev]' # editable + all extras + tests
89
+ ```
90
+
91
+ Requires Python 3.11+ (for the stdlib `tomllib`).
92
+
93
+ ## Usage
94
+
95
+ ```bash
96
+ export GITHUB_TOKEN=ghp_... # a PAT; raises rate limits and enables search
97
+ praiser torvalds # default: the highlights summary (below)
98
+ praiser gvanrossum --format md # the full report (Markdown)
99
+ praiser gvanrossum --format json -o gvanrossum.json # full report as JSON
100
+ praiser someuser --no-discover-roles --no-llm # skip the LLM/web features
101
+ ```
102
+
103
+ By default `praiser <username>` prints a compact **highlights** summary — the
104
+ top roles, one line each, plus breadth stats. Use `--format md|json` for the
105
+ full per-project report with evidence links, or `--highlights N` to change the
106
+ count.
107
+
108
+ ```
109
+ pearu — top 8 highlights:
110
+ - pytorch/pytorch — Maintainer (101k★, conf 0.90)
111
+ - numpy/numpy — Maintainer (32k★, conf 0.90)
112
+ - scipy/scipy — Maintainer (15k★, conf 0.90)
113
+ - heavyai/heavydb — Core contributor (3k★, conf 0.80)
114
+
115
+ …plus N more elevated-role project(s); M smaller but widely-used project(s) with a notable role.
116
+ Reach: T project(s) across C communities (distinct orgs).
117
+ ```
118
+
119
+ The footer summarises breadth beyond the top roles: the smaller-but-widely-used
120
+ projects where the user also holds a notable role, and the **community reach**
121
+ (distinct organisations) — a proxy for the potential to introduce ideas widely.
122
+
123
+ `praiser <username>` is meant to be sufficient on its own: role auto-discovery
124
+ and registry persistence are **on by default** (auto-discovery activates only
125
+ when LLM credentials are present, and degrades silently otherwise).
126
+
127
+ ### GitHub token
128
+
129
+ Without a token GitHub allows only ~60 requests/hour, which is not enough.
130
+ Provide one via `--token`, the `GITHUB_TOKEN`/`GH_TOKEN` env var, or simply be
131
+ logged into the [`gh` CLI](https://cli.github.com) (`gh auth login`) — the tool
132
+ falls back to `gh auth token` automatically.
133
+
134
+ For the optional LLM features (`--discover-roles`, and the governance prose
135
+ fallback) set an Anthropic API key — create one at
136
+ **https://console.anthropic.com/settings/keys**, then
137
+ `export ANTHROPIC_API_KEY=...` and install the extra (`pip install
138
+ 'praiser[llm]'`).
139
+
140
+ Create a GitHub Personal Access Token at **https://github.com/settings/tokens**:
141
+
142
+ * *classic* — no scopes are needed for public data; add `repo` (private repos)
143
+ and `read:org` (resolve `@org/team` membership in CODEOWNERS) for full
144
+ coverage;
145
+ * *fine-grained* — read-only **Contents** + **Members** permissions.
146
+
147
+ ```bash
148
+ export GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx
149
+ ```
150
+
151
+ ### Rate limits & performance
152
+
153
+ A token is capped at 5,000 REST requests/hour and that ceiling can't be raised
154
+ for a PAT (only GitHub Apps / Enterprise Cloud go higher). To stay under it the
155
+ tool:
156
+
157
+ * **fetches file contents via GraphQL in batches** — GraphQL is a *separate*
158
+ 5,000-points/hour bucket, so the bulk of the work (reading CODEOWNERS,
159
+ manifests, and possibly hundreds of proposal files) doesn't touch the REST
160
+ limit, and many files come back in one request;
161
+ * **caches every request** so re-runs and resumed runs are nearly free;
162
+ * **drops forks** and only deep-scans plausible candidates.
163
+
164
+ If a run is rate-limited it stops early, tells you how long to wait, and the
165
+ cache preserves what already succeeded — so re-running finishes the job.
166
+
167
+ ```
168
+ praiser <username>
169
+ [--forge github|codeberg|gitlab|gitee|bitbucket|cgit] code host (default: github)
170
+ [--forge-url URL] self-hosted instance for --forge gitlab|codeberg|cgit
171
+ [--forge-name LABEL] short label for the --forge-url instance
172
+ [--cross-forge] follow verified profile links to the person's other
173
+ forges and merge into one record
174
+ [--also-forge FORGE:LOGIN] also scan this identity on another forge (repeatable)
175
+ [--min-stars N] popularity threshold (default 50)
176
+ [--highlights [N]] top-N highlights summary (this is the DEFAULT view; N=8)
177
+ [--format md|json] full per-project report instead of the highlights
178
+ [--token TOKEN] or GITHUB_TOKEN / GH_TOKEN
179
+ [--cache-dir DIR] default ~/.cache/praiser
180
+ [--registry FILE] known-projects file (default: ~/.local/share/praiser/)
181
+ [--no-save-registry] don't persist popularity + discovered role sources
182
+ [--no-discover-roles] don't web-search for role pages (default: on w/ LLM)
183
+ [--no-wikidata] don't derive creator/developer roles from Wikidata
184
+ [--no-package-registries] skip PyPI/npm/crates.io lookups (default: on)
185
+ [--no-llm] disable all Claude features
186
+ [--add-repo OWNER/REPO] force-scan a repo discovery missed (repeatable)
187
+ [--include-private] also scan private repos (default: skip them)
188
+ [--contributor-pages N] contributors API pages/repo, 100 each (default: 2)
189
+ [-j N | --jobs N] candidates scanned concurrently (default: 8)
190
+ [-o FILE] write output to a file instead of stdout
191
+ [-v] detailed per-repo logging
192
+ [-q] suppress the live progress display
193
+ ```
194
+
195
+ On an interactive terminal the tool shows live progress on stderr
196
+ (`scanning repo 42/107 …`) so you can see it working; output (JSON/Markdown)
197
+ still goes to stdout. Progress is automatically suppressed when stderr is
198
+ redirected, with `-q`, or in `-v` mode (which prints detailed logs instead).
199
+
200
+ JSON is the source of truth; Markdown is a human-readable view. Every claim
201
+ carries an **evidence link** (file/page URL) and a **confidence** score.
202
+
203
+ ## Web demo
204
+
205
+ A [Streamlit](https://streamlit.io) web UI wraps the same engine: type a
206
+ username, pick a forge, and get the ranked record with evidence links — with
207
+ instant **view / top-N / min-stars** controls, a live progress bar, and a
208
+ "recent scans" picker. Collected results are shared across sessions via a
209
+ durable cache, so repeat lookups are fast.
210
+
211
+ - **Hosted demo:** <https://praiser.streamlit.app/>
212
+ - **Run locally or deploy your own:** see [`web/README.md`](web/README.md).
213
+
214
+ The web layer is split into a framework-agnostic core (`web/core` — a
215
+ `praise()` service + cache) and the Streamlit frontend (`web/streamlit`), so a
216
+ different frontend (FastAPI, Gradio, …) can reuse the core unchanged.
217
+
218
+ ## How it works
219
+
220
+ 1. **Identity resolution** — assemble `{logins, names, emails}` from the
221
+ profile. Handle/email matches are high-confidence; name-only matches are weak.
222
+ 2. **Discovery (wide net)** — owned repos, org repos, contributed-to repos
223
+ (over-collected on purpose), **commit search** (`author:`, catches old
224
+ involvement the contribution graph has dropped), code search for the handle
225
+ in role files, **name search** in `AUTHORS`/`THANKS`/`CONTRIBUTORS`,
226
+ **package registries** (packages the user maintains on npm/crates.io, whose
227
+ source repos are pulled in — catches projects where the role is "package
228
+ maintainer" rather than "top committer"; `--no-package-registries` to skip),
229
+ and curated registry seeds.
230
+ Forks (which inherit upstream role files) and private repos are dropped
231
+ here — a public "popular projects" record shouldn't surface or leak private
232
+ repos. Use `--include-private` to scan them anyway. If the net still misses a
233
+ project (e.g. a private-dev repo, or one whose history GitHub doesn't
234
+ attribute), name it with `--add-repo OWNER/REPO` — it's force-scanned and
235
+ force-included, with the role still detected automatically.
236
+ 3. **Role attribution** — a registry of pluggable [extractors](praiser/extractors)
237
+ (`ownership`, `codeowners`, `maintainers`, `manifests`, `enhancement_proposals`,
238
+ `governance`, `contributors`, `subcomponents`, `authors`, `web_roles`,
239
+ `packages`). The
240
+ `contributors` signal measures size by commits **and** merged-PR count
241
+ (robust to squash/ghstack one-commit-per-PR workflows and unlinked commit
242
+ emails); `subcomponents` credits leading/authoring a *part* of a monorepo via
243
+ commit-path analysis (e.g. f2py in NumPy, sparse tensors in PyTorch) — seeded
244
+ in the registry and extendable with `--add-repo owner/repo:path`. `packages`
245
+ credits **maintainer** of an npm/crates.io package (keyed on the user's login)
246
+ and **author** of a PyPI distribution (matched on the author/maintainer name,
247
+ so a popular package isn't mis-credited to a mere contributor) — only when
248
+ the package itself names the repo as its source, which guards against
249
+ registry-handle collisions.
250
+ (A LOC-diff size axis is intentionally deferred — noisy with generated/vendored
251
+ code and costly to compute — until a need justifies the extra dimension.)
252
+ A repo under the user's
253
+ own account is attributed as **author/creator**, and manifest `authors` vs
254
+ `maintainers` fields map to the author vs maintainer roles — so a user's own
255
+ projects read "Author", not merely "core contributor". Structured files are parsed
256
+ deterministically; ambiguous prose falls back to Claude **only after** a
257
+ keyword/regex pass. `contributors` records a **core-contributor** role for
258
+ substantial committers to popular/widely-used repos (catches historical
259
+ maintainers and authors of major components, e.g. f2py in NumPy). Role-file
260
+ matches (`CODEOWNERS`/`AUTHORS`) are corroborated with **copy-resistant**
261
+ signals — affiliation or being the canonical popular project — so a repo that
262
+ *vendored* an upstream's history and role files isn't a false positive.
263
+ 4. **Popularity filter** — `--min-stars`, with an override so high-signal roles
264
+ on smaller-but-notable standards projects survive. Elevated-role projects
265
+ that miss the bar but are **widely used and maintained** (real fork
266
+ engagement + recently pushed) are reported as a secondary group with a count.
267
+ 5. **Render** — ranked by popularity × role weight × confidence. Live
268
+ rate-limit dynamics (REST/GraphQL remaining) are shown during the scan.
269
+
270
+ ## The known-projects registry
271
+
272
+ [`praiser/data/known_projects.json`](praiser/data/known_projects.json) stores
273
+ popular/important projects together with:
274
+
275
+ * **`role_conventions`** — how that project records roles *in the repo* (which
276
+ extractor + path + header format), so extractors can parse directly instead of
277
+ re-detecting, and curated knowledge is reusable;
278
+ * **`role_sources`** — **authoritative web pages** that list role holders, with
279
+ the role each confers. Many projects record maintainers/steering councils on a
280
+ site, not in a repo file, and the format varies wildly — so you point at the
281
+ exact URL rather than have the tool guess. The `web_roles` extractor fetches
282
+ each page and matches the user by GitHub handle (a `github.com/<handle>` link)
283
+ or full name. Example:
284
+ ```json
285
+ "numpy/numpy": {
286
+ "role_sources": [
287
+ {"url": "https://numpy.org/teams/", "role": "maintainer", "label": "NumPy team"},
288
+ {"url": "https://numpy.org/about/", "role": "steering_council", "label": "Steering Council"}
289
+ ]
290
+ }
291
+ ```
292
+ This is more authoritative than commit-count heuristics — it reflects the
293
+ project's own statement of who holds the role — and it's why a vendored *copy*
294
+ of a project (which carries the upstream's commit history, making the user look
295
+ like a heavy committer) is not mistaken for a real role: role-file and
296
+ contributor signals are trusted only on the user's own/org repos or the
297
+ canonical popular project, never on a small unaffiliated copy.
298
+ * **`popularity`** — cached/curated stars/forks plus `min_stars_override` for
299
+ high-signal-but-small standards projects;
300
+ * **`importance`** — a human label (`critical`, `high`, ...).
301
+
302
+ Point `--registry mine.json` at your own file to extend or override the seed;
303
+ add `--save-registry` to have observed popularity **and any web-discovered role
304
+ sources** (`--discover-roles`) written back — so a one-off discovery becomes
305
+ reusable curated knowledge. Authoritative roles are conservative: high-authority
306
+ roles like steering council require a GitHub-**handle** match on the page (not
307
+ just a name, which is too easily a founder/credit mention).
308
+
309
+ Discovery results are also cached (the web-search call and fetched pages), so
310
+ re-runs don't re-search even without `--save-registry`.
311
+
312
+ ### Enhancement-proposal generalization
313
+
314
+ PEP / NEP / SPEC / JEP and friends share one shape: a folder of numbered
315
+ documents with an `Author:` (or `:Author:`, or YAML front-matter) header. They
316
+ are handled by **one** extractor parameterized by `(path, header_format)`, which
317
+ also **auto-detects** the pattern when a repo has a directory of numbered
318
+ `*.rst`/`*.md` files with author metadata.
319
+
320
+ ## Development
321
+
322
+ ```bash
323
+ pip install -e '.[dev]'
324
+ pytest # offline parser tests, no network
325
+ ```
326
+
327
+ Each extractor keeps its parsing logic in a pure function (e.g.
328
+ `parse_codeowners`, `parse_proposal_header`, `parse_owners_yaml`) so tests run
329
+ fully offline.
330
+
331
+ ### Releasing to PyPI
332
+
333
+ The version is single-sourced from `praiser.__version__`. To cut a release:
334
+
335
+ 1. Bump `__version__` in `praiser/__init__.py`; commit.
336
+ 2. Create a **GitHub Release** with tag `vX.Y.Z` (matching the version).
337
+ 3. The `publish.yml` workflow builds the sdist + wheel and uploads them to PyPI
338
+ via **Trusted Publishing** (OIDC — no API token stored).
339
+
340
+ **One-time PyPI setup** (before the first release): on PyPI, add a *trusted
341
+ publisher* for the project — owner `openteams-ai`, repo `praiser`, workflow
342
+ `publish.yml`, environment `pypi`. (For the very first upload you can instead
343
+ `python -m build && twine upload dist/*` with a PyPI token, then switch to
344
+ trusted publishing.)
345
+
346
+ ## Author & license
347
+
348
+ Created by **Pearu Peterson** (pearu.peterson@gmail.com), with assistance from
349
+ **Claude** (Anthropic). Licensed under the **BSD 3-Clause** license — see
350
+ [LICENSE](LICENSE).