pyOpenSourceProjects 0.4.1__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/.github/workflows/build.yml +4 -4
  2. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/.github/workflows/upload-to-pypi.yml +2 -2
  3. pyopensourceprojects-0.5.1/AGENTS.md +210 -0
  4. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/PKG-INFO +7 -3
  5. pyopensourceprojects-0.5.1/osprojects/__init__.py +1 -0
  6. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/osprojects/check_project.py +29 -12
  7. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/osprojects/checkos.py +6 -2
  8. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/osprojects/github_api.py +143 -6
  9. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/osprojects/osproject.py +31 -5
  10. pyopensourceprojects-0.5.1/osprojects/version.py +16 -0
  11. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/pyproject.toml +11 -3
  12. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/tests/test_github_api.py +43 -1
  13. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/tests/test_osproject.py +0 -1
  14. pyopensourceprojects-0.4.1/osprojects/__init__.py +0 -1
  15. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/.gitignore +0 -0
  16. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/.project +0 -0
  17. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/.pydevproject +0 -0
  18. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/LICENSE +0 -0
  19. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/README.md +0 -0
  20. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/mkdocs.yml +0 -0
  21. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/osprojects/editor.py +0 -0
  22. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/scripts/blackisort +0 -0
  23. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/scripts/doc +0 -0
  24. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/scripts/install +0 -0
  25. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/scripts/installAndTest +0 -0
  26. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/scripts/release +0 -0
  27. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/scripts/test +0 -0
  28. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/tests/__init__.py +0 -0
  29. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/tests/basetest.py +0 -0
  30. {pyopensourceprojects-0.4.1 → pyopensourceprojects-0.5.1}/tests/test_github.py +0 -0
@@ -20,13 +20,13 @@ jobs:
20
20
  matrix:
21
21
  #os: [ubuntu-latest, macos-latest, windows-latest]
22
22
  os: [ubuntu-latest]
23
- #python-version: [ '3.11', '3.12', '3.13' ]
24
- python-version: ["3.12"]
23
+ #python-version: [ '3.10', '3.11', '3.12', '3.13' ]
24
+ python-version: ["3.10"]
25
25
 
26
26
  steps:
27
- - uses: actions/checkout@v4
27
+ - uses: actions/checkout@v6
28
28
  - name: Set up Python ${{ matrix.python-version }}
29
- uses: actions/setup-python@v5
29
+ uses: actions/setup-python@v6
30
30
  with:
31
31
  python-version: ${{ matrix.python-version }}
32
32
  - name: Install dependencies
@@ -11,9 +11,9 @@ jobs:
11
11
  # IMPORTANT: this permission is mandatory for trusted publishing
12
12
  id-token: write
13
13
  steps:
14
- - uses: actions/checkout@v4
14
+ - uses: actions/checkout@v6
15
15
  - name: Set up Python
16
- uses: actions/setup-python@v5
16
+ uses: actions/setup-python@v6
17
17
  with:
18
18
  python-version: '3.x'
19
19
  - name: Install dependencies
@@ -0,0 +1,210 @@
1
+ # Agent Guidelines for pyOpenSourceProjects
2
+
3
+ This file provides guidelines and commands for agentic coding agents working in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ Python library for checking open source projects for standard compliance (README, GitHub workflow, pyproject.toml). Creates badges for projects.
8
+
9
+ - **Main package**: `osprojects/`
10
+ - **Tests**: `tests/`
11
+ - **Python**: 3.10+
12
+
13
+ ## Build & Installation
14
+
15
+ ```bash
16
+ # Install package in development mode
17
+ pip install . -U
18
+
19
+ # Install with test dependencies
20
+ pip install . -U[test]
21
+ ```
22
+
23
+ ## Testing
24
+
25
+ ### Run All Tests
26
+
27
+ ```bash
28
+ # Using unittest discover (default)
29
+ python3 -m unittest discover
30
+
31
+ # Using the test script
32
+ ./scripts/test
33
+ ```
34
+
35
+ ### Run Single Test
36
+
37
+ ```bash
38
+ # Run a specific test file
39
+ python -m unittest tests.test_github
40
+
41
+ # Run a specific test class
42
+ python -m unittest tests.test_github.GitHubTest
43
+
44
+ # Run a specific test method
45
+ python -m unittest tests.test_github.GitHubTest.test_example
46
+
47
+ # Module-wise testing (runs each test file separately)
48
+ ./scripts/test --module
49
+ ```
50
+
51
+ ### Other Test Runners
52
+
53
+ ```bash
54
+ # Using green
55
+ ./scripts/test --green
56
+
57
+ # Using tox
58
+ ./scripts/test --tox
59
+ ```
60
+
61
+ ## Code Formatting
62
+
63
+ This project uses multiple formatters. Run all before committing:
64
+
65
+ ```bash
66
+ # Format code with black, sort imports with isort, format docstrings with docformatter
67
+ ./scripts/blackisort
68
+ ```
69
+
70
+ Individual commands:
71
+ ```bash
72
+ # Sort imports
73
+ isort tests/*.py osprojects/*.py
74
+
75
+ # Format code
76
+ black tests/*.py osprojects/*.py
77
+
78
+ # Format docstrings
79
+ docformatter --in-place tests/*.py osprojects/*.py
80
+ ```
81
+
82
+ ## Code Style Guidelines
83
+
84
+ ### General
85
+
86
+ - Follow [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html)
87
+ - Use type hints where appropriate
88
+ - Maximum line length: 88 characters (Black default)
89
+
90
+ ### Imports
91
+
92
+ - Use `isort` for import sorting
93
+ - Order: standard library, third-party, local
94
+ - Example:
95
+ ```python
96
+ import os
97
+ import time
98
+ from typing import Any, Optional
99
+
100
+ import requests
101
+ from git import Repo
102
+
103
+ from osprojects import __version__
104
+ from osprojects.github_api import GitHubAPI
105
+ ```
106
+
107
+ ### Docstrings
108
+
109
+ - Use Google-style docstrings
110
+ - Run `docformatter` before committing
111
+ - Example:
112
+ ```python
113
+ def fetch_data(url: str, timeout: int = 30) -> dict:
114
+ """Fetch data from the given URL.
115
+
116
+ Args:
117
+ url: The URL to fetch data from.
118
+ timeout: Request timeout in seconds.
119
+
120
+ Returns:
121
+ A dictionary containing the response data.
122
+
123
+ Raises:
124
+ requests.RequestException: If the request fails.
125
+ """
126
+ ```
127
+
128
+ ### Naming Conventions
129
+
130
+ - **Functions/methods**: `snake_case` (e.g., `fetch_data`, `get_project_info`)
131
+ - **Classes**: `PascalCase` (e.g., `GitHubAPI`, `ProjectChecker`)
132
+ - **Constants**: `UPPER_SNAKE_CASE` (e.g., `DEFAULT_TIMEOUT`, `API_VERSION`)
133
+ - **Private methods**: prefix with `_` (e.g., `_internal_method`)
134
+
135
+ ### Error Handling
136
+
137
+ - Use specific exception types
138
+ - Include informative error messages
139
+ - Example:
140
+ ```python
141
+ try:
142
+ result = api.request(endpoint)
143
+ except requests.RequestException as e:
144
+ raise APIError(f"Failed to fetch {endpoint}: {e}") from e
145
+ ```
146
+
147
+ ### Testing Guidelines
148
+
149
+ - Test files: `tests/test_*.py`
150
+ - Test classes: `Test*` (e.g., `GitHubTest`)
151
+ - Test methods: `test_*` (e.g., `test_fetch_projects`)
152
+ - Use the `BaseTest` class from `tests/basetest.py` for common functionality
153
+ - Include docstrings in tests explaining what is being tested
154
+
155
+ ### Type Hints
156
+
157
+ - Use type hints for function signatures
158
+ - Prefer `Optional[X]` over `X | None`
159
+ - Use `Any` sparingly
160
+ - Example:
161
+ ```python
162
+ def process_items(items: list[dict], filter_key: str) -> Optional[list[str]]:
163
+ """Process items and return filtered values."""
164
+ values = [item.get(filter_key) for item in items if filter_key in item]
165
+ return values if values else None
166
+ ```
167
+
168
+ ## Version Bumping
169
+
170
+ The package version is stored in `osprojects/__init__.py` and mirrored in `osprojects/version.py`.
171
+
172
+ To bump the version:
173
+
174
+ 1. Edit `osprojects/__init__.py` and update `__version__`
175
+ 2. Edit `osprojects/version.py` and update the `version`, `updated` fields in the `Version` class
176
+
177
+ ```python
178
+ # osprojects/__init__.py
179
+ __version__ = "0.5.1"
180
+
181
+ # osprojects/version.py — Version class fields to update:
182
+ version = osprojects.__version__ # reads from __init__.py automatically
183
+ updated = "2026-02-22" # set to today's date
184
+ ```
185
+
186
+ Then run formatters, tests, commit and push (or use `scripts/release`).
187
+
188
+ ## Git Workflow
189
+
190
+ 1. Create a feature branch
191
+ 2. Make changes following the code style guidelines
192
+ 3. Run formatters: `./scripts/blackisort`
193
+ 4. Run tests: `./scripts/test`
194
+ 5. Commit with a descriptive message
195
+ 6. Push and create a pull request
196
+
197
+ ## Release
198
+
199
+ ```bash
200
+ # Builds docs and commits + pushes
201
+ ./scripts/release
202
+ ```
203
+
204
+ ## CLI Commands
205
+
206
+ The package provides these CLI entry points:
207
+
208
+ - `checkos`: Main CLI for checking open source projects
209
+ - `issue2ticket`: Convert issues to tickets
210
+ - `gitlog2wiki`: Convert git log to wiki format
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyOpenSourceProjects
3
- Version: 0.4.1
4
- Dynamic: Summary
3
+ Version: 0.5.1
4
+ Summary: Check python OpenSource Projects to follow standards for README, github workflow and pyprojec.toml - creates badges
5
5
  Project-URL: Home, https://github.com/WolfgangFahl/pyOpenSourceProjects
6
6
  Project-URL: Documentation, http://wiki.bitplan.com/index.php/pyOpenSourceProjects
7
7
  Project-URL: Source, https://github.com/WolfgangFahl/pyOpenSourceProjects
@@ -10,17 +10,21 @@ Maintainer-email: Wolfgang Fahl <wf@bitplan.com>
10
10
  License-Expression: Apache-2.0
11
11
  License-File: LICENSE
12
12
  Classifier: Programming Language :: Python
13
+ Classifier: Programming Language :: Python :: 3.10
13
14
  Classifier: Programming Language :: Python :: 3.11
14
15
  Classifier: Programming Language :: Python :: 3.12
15
16
  Classifier: Programming Language :: Python :: 3.13
16
- Requires-Python: >=3.11
17
+ Requires-Python: >=3.10
18
+ Requires-Dist: backoff
17
19
  Requires-Dist: beautifulsoup4>=4.14.2
18
20
  Requires-Dist: gitpython
19
21
  Requires-Dist: packaging>=24.1
20
22
  Requires-Dist: py-3rdparty-mediawiki>=0.18.1
21
23
  Requires-Dist: pylodstorage>=0.17.0
22
24
  Requires-Dist: python-dateutil>=2.8.2
25
+ Requires-Dist: ratelimit>=2.2.1
23
26
  Requires-Dist: requests
27
+ Requires-Dist: tomli>=2.3.0; python_version < '3.11'
24
28
  Requires-Dist: tqdm>=4.66.5
25
29
  Provides-Extra: test
26
30
  Description-Content-Type: text/markdown
@@ -0,0 +1 @@
1
+ __version__ = "0.5.1"
@@ -4,7 +4,7 @@
4
4
  """
5
5
 
6
6
  import os
7
- import tomllib
7
+ import sys
8
8
  from dataclasses import dataclass
9
9
  from typing import List
10
10
 
@@ -103,7 +103,7 @@ class CheckProject:
103
103
  owner = self.project.owner
104
104
  project_id = self.project.project_id
105
105
 
106
- markup= f"""| | |
106
+ markup = f"""| | |
107
107
  | :--- | :--- |
108
108
  | **PyPi** | [![PyPI Status](https://img.shields.io/pypi/v/{project_name}.svg)](https://pypi.python.org/pypi/{project_name}/) [![License](https://img.shields.io/github/license/{owner}/{project_id}.svg)](https://www.apache.org/licenses/LICENSE-2.0) [![pypi](https://img.shields.io/pypi/pyversions/{project_name})](https://pypi.org/project/{project_name}/) [![format](https://img.shields.io/pypi/format/{project_name})](https://pypi.org/project/{project_name}/) [![downloads](https://img.shields.io/pypi/dd/{project_name})](https://pypi.org/project/{project_name}/) |
109
109
  | **GitHub** | [![Github Actions Build](https://github.com/{owner}/{project_id}/actions/workflows/build.yml/badge.svg)](https://github.com/{owner}/{project_id}/actions/workflows/build.yml) [![Release](https://img.shields.io/github/v/release/{owner}/{project_id})](https://github.com/{owner}/{project_id}/releases) [![Contributors](https://img.shields.io/github/contributors/{owner}/{project_id})](https://github.com/{owner}/{project_id}/graphs/contributors) [![Last Commit](https://img.shields.io/github/last-commit/{owner}/{project_id})](https://github.com/{owner}/{project_id}/commits/) [![GitHub issues](https://img.shields.io/github/issues/{owner}/{project_id}.svg)](https://github.com/{owner}/{project_id}/issues) [![GitHub closed issues](https://img.shields.io/github/issues-closed/{owner}/{project_id}.svg)](https://github.com/{owner}/{project_id}/issues/?q=is%3Aissue+is%3Aclosed) |
@@ -111,8 +111,6 @@ class CheckProject:
111
111
  | **Docs** | [![API Docs](https://img.shields.io/badge/API-Documentation-blue)](https://{owner}.github.io/{project_id}/) [![formatter-docformatter](https://img.shields.io/badge/%20formatter-docformatter-fedcba.svg)](https://github.com/PyCQA/docformatter) [![style-google](https://img.shields.io/badge/%20style-google-3666d6.svg)](https://google.github.io/styleguide/pyguide.html#s3.8-comments-and-docstrings) |"""
112
112
  return markup
113
113
 
114
-
115
-
116
114
  def check_local(self) -> Check:
117
115
  local = Check.file_exists(self.project_path)
118
116
  return local
@@ -140,7 +138,7 @@ class CheckProject:
140
138
  msg=f"{min_python_version_minor} (build.yml)!={self.min_python_version_minor} (pyprojec.toml)",
141
139
  path=file_path,
142
140
  )
143
- python_versions = f"""python-version: [ {', '.join([f"'3.{i}'" for i in range(self.min_python_version_minor, self.max_python_version_minor+1)])} ]"""
141
+ python_versions = f"""python-version: [ {", ".join([f"'3.{i}'" for i in range(self.min_python_version_minor, self.max_python_version_minor + 1)])} ]"""
144
142
  self.add_content_check(
145
143
  content,
146
144
  python_versions,
@@ -152,11 +150,11 @@ class CheckProject:
152
150
  file_path,
153
151
  )
154
152
  self.add_content_check(
155
- content, "uses: actions/checkout@v4", file_path
153
+ content, "uses: actions/checkout@v6", file_path
156
154
  )
157
155
  self.add_content_check(
158
156
  content,
159
- "uses: actions/setup-python@v5",
157
+ "uses: actions/setup-python@v6",
160
158
  file_path,
161
159
  )
162
160
 
@@ -173,11 +171,11 @@ class CheckProject:
173
171
  elif file == "upload-to-pypi.yml":
174
172
  self.add_content_check(content, "id-token: write", file_path)
175
173
  self.add_content_check(
176
- content, "uses: actions/checkout@v4", file_path
174
+ content, "uses: actions/checkout@v6", file_path
177
175
  )
178
176
  self.add_content_check(
179
177
  content,
180
- "uses: actions/setup-python@v5",
178
+ "uses: actions/setup-python@v6",
181
179
  file_path,
182
180
  )
183
181
  self.add_content_check(
@@ -240,13 +238,13 @@ class CheckProject:
240
238
  readme_content, "readthedocs", readme_path, negative=True
241
239
  )
242
240
 
243
- def check_pyproject_toml(self) -> bool:
244
- """pyproject.toml."""
241
+ def check_pyproject_toml_vialib(self, toml_module) -> bool:
242
+ """Check pyproject.toml using the given toml_module."""
245
243
  toml_path = os.path.join(self.project_path, "pyproject.toml")
246
244
  toml_exists = self.add_path_check(toml_path)
247
245
  if toml_exists.ok:
248
246
  content = toml_exists.content
249
- toml_dict = tomllib.loads(content)
247
+ toml_dict = toml_module.loads(content)
250
248
  project_check = self.add_check(
251
249
  "project" in toml_dict, "[project]", toml_path
252
250
  )
@@ -281,6 +279,25 @@ class CheckProject:
281
279
  )
282
280
  return toml_exists.ok
283
281
 
282
+ def check_pyproject_toml_py311(self) -> bool:
283
+ """Python 3.11+ implementation (uses stdlib tomllib)."""
284
+ import tomllib
285
+
286
+ return self.check_pyproject_toml_vialib(tomllib)
287
+
288
+ def check_pyproject_toml_py310(self) -> bool:
289
+ """Python 3.10 implementation (uses third-party tomli)."""
290
+ import tomli as tomllib # @UnresolvedImport
291
+
292
+ return self.check_pyproject_toml_vialib(tomllib)
293
+
294
+ def check_pyproject_toml(self) -> bool:
295
+ """Delegator that picks the correct implementation based on Python
296
+ version."""
297
+ if sys.version_info >= (3, 11):
298
+ return self.check_pyproject_toml_py311()
299
+ return self.check_pyproject_toml_py310()
300
+
284
301
  def check_git(self) -> bool:
285
302
  """Check git repository information using GitHub class.
286
303
 
@@ -3,6 +3,7 @@
3
3
 
4
4
  @author: wf
5
5
  """
6
+
6
7
  import argparse
7
8
  import logging
8
9
  import os
@@ -29,7 +30,11 @@ class CheckOS:
29
30
 
30
31
  @classmethod
31
32
  def from_args(cls, args: Namespace):
32
- osprojects = OsProjects.from_folder(args.workspace, with_progress=True)
33
+ # Optimize: if --project and --local are both specified, pass project_id to avoid scanning all owners
34
+ project_id = args.project if (args.project and args.local) else None
35
+ osprojects = OsProjects.from_folder(
36
+ args.workspace, with_progress=True, project_id=project_id
37
+ )
33
38
  return cls(args, osprojects)
34
39
 
35
40
  def select_projects(self):
@@ -77,7 +82,6 @@ class CheckOS:
77
82
  if self.args.badges:
78
83
  print(checker.generate_badge_markdown())
79
84
 
80
-
81
85
  def handle_exception(self, ex: Exception):
82
86
  CheckOS.show_exception(ex, self.args.debug)
83
87
 
@@ -1,6 +1,14 @@
1
1
  """Created on 2024-08-27.
2
2
 
3
3
  @author: wf
4
+
5
+ Request limits
6
+ see https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28
7
+
8
+ Authenticated Requests (Core API): 5,000 requests per hour.
9
+ Unauthenticated Requests (Core API): 60 requests per hour.
10
+ Search API (Authenticated): 30 requests per minute.
11
+ Search API (Unauthenticated): 10 requests per minute.
4
12
  """
5
13
 
6
14
  import json
@@ -8,10 +16,14 @@ import os
8
16
  import re
9
17
  import time
10
18
  from dataclasses import dataclass, field
11
- from typing import Dict, List
19
+ from datetime import datetime
20
+ from typing import Dict, List, Optional
12
21
  from urllib.parse import urlparse
13
22
 
14
23
  import requests
24
+ from backoff import expo, on_exception
25
+ from basemkit.yamlable import lod_storable
26
+ from ratelimit import RateLimitException, limits
15
27
 
16
28
 
17
29
  class GitHubApi:
@@ -31,8 +43,8 @@ class GitHubApi:
31
43
 
32
44
  def __init__(self):
33
45
  """constructor."""
34
- home_dir = os.path.expanduser("~")
35
- self.base_dir = os.path.join(home_dir, ".github")
46
+ self.home_dir = os.path.expanduser("~")
47
+ self.base_dir = os.path.join(self.home_dir, ".github")
36
48
  os.makedirs(self.base_dir, exist_ok=True)
37
49
  self.cache_dir = os.path.join(self.base_dir, "cache")
38
50
  os.makedirs(self.cache_dir, exist_ok=True)
@@ -44,21 +56,28 @@ class GitHubApi:
44
56
  )
45
57
  self.api_url = "https://api.github.com"
46
58
 
59
+ def get_cache_path(self, file_name: str):
60
+ """Get the cache path for the given file_name."""
61
+ cache_path = os.path.join(self.cache_dir, file_name)
62
+ return cache_path
63
+
47
64
  def load_access_token(self) -> str:
48
65
  """If $HOME/.github/access_token.json exists read the token from
49
66
  there."""
50
67
  # Specify the path to the access token file
51
68
  token_file_path = os.path.join(self.base_dir, "access_token.json")
52
-
69
+ token = None
53
70
  # Check if the file exists and read the token
54
71
  if os.path.exists(token_file_path):
55
72
  with open(token_file_path, "r") as token_file:
56
73
  token_data = json.load(token_file)
57
- return token_data.get("access_token")
74
+ token = token_data.get("access_token")
58
75
 
59
76
  # Return None if no token file is found
60
- return None
77
+ return token
61
78
 
79
+ @on_exception(expo, RateLimitException, max_tries=8)
80
+ @limits(calls=5000, period=3600)
62
81
  def get_response(self, title: str, url: str, params={}, allow_redirects=True):
63
82
  """Get response from GitHub API or Google Docs API.
64
83
 
@@ -83,6 +102,9 @@ class GitHubApi:
83
102
  err_msg = (
84
103
  f"Failed to {title} for {url}: {response.status_code} - {response.text}"
85
104
  )
105
+ # Raise RateLimitException explicitly if server returns 429 or 403 to trigger backoff
106
+ if response.status_code in [403, 429]:
107
+ raise RateLimitException(err_msg, period_remaining=60)
86
108
  raise Exception(err_msg)
87
109
 
88
110
  return response
@@ -245,6 +267,121 @@ class GitHubRepo:
245
267
  return all_issues_records
246
268
 
247
269
 
270
+ @lod_storable
271
+ class GitHubFile:
272
+ """A single Github file."""
273
+
274
+ repo_name: str
275
+ path: str
276
+ sha: str
277
+ html_url: str
278
+ # Note: Code Search API does not return a date.
279
+ # We add this field here so it can be populated later if we do specific lookups.
280
+ created_at: Optional[datetime] = None
281
+
282
+ @property
283
+ def month_key(self) -> str:
284
+ """Returns 'YYYY-MM' or 'Unknown'."""
285
+ month_key = None
286
+ if self.created_at:
287
+ month_key = self.created_at.strftime("%Y-%m")
288
+ return month_key
289
+
290
+
291
+ @lod_storable
292
+ class GitHubFileSet:
293
+ """A set of GitHubFiles."""
294
+
295
+ # map files by sha
296
+ files: Dict[str, GitHubFile] = field(default_factory=dict)
297
+ _sha_set: set = field(default_factory=set)
298
+
299
+ def add(self, api_item: dict) -> Optional[GitHubFile]:
300
+ """Parses a raw API item and adds it to cache if unique.
301
+
302
+ Returns the item if added, None if duplicate.
303
+ """
304
+ gh_file = None
305
+ sha = api_item.get("sha")
306
+
307
+ # Deduplication check
308
+ if sha and not sha in self._sha_set:
309
+
310
+ # Extract fields
311
+ repo_info = api_item.get("repository", {})
312
+
313
+ # Attempt to find date (usually not in search/code results, but parsing if present)
314
+ date_obj = None
315
+ raw_date = repo_info.get("created_at")
316
+ if raw_date:
317
+ try:
318
+ date_obj = datetime.fromisoformat(raw_date.replace("Z", "+00:00"))
319
+ except ValueError:
320
+ pass
321
+
322
+ # Create the instance
323
+ gh_file = GitHubFile(
324
+ repo_name=repo_info.get("full_name", "unknown/repo"),
325
+ path=api_item.get("path", ""),
326
+ sha=sha,
327
+ html_url=api_item.get("html_url", ""),
328
+ created_at=date_obj,
329
+ )
330
+
331
+ # Store in dict
332
+ self.files[sha] = gh_file
333
+ self._sha_set.add(sha)
334
+
335
+ return gh_file
336
+
337
+ @classmethod
338
+ def from_query(
339
+ cls, query: str, limit: int = 1000, verbose: bool = False
340
+ ) -> "GitHubFileSet":
341
+ """Factory function to query GitHub Code Search and populate a
342
+ GitHubFileSet.
343
+
344
+ Handles pagination and rate limiting.
345
+ """
346
+ github_api = GitHubApi.get_instance()
347
+ file_set = GitHubFileSet()
348
+ per_page = 100
349
+
350
+ # Calculate max pages (API usually limits code search results to 1000)
351
+ max_pages = (limit // per_page) + 1
352
+
353
+ if verbose:
354
+ print(f"Searching up to {limit} files for query: {query}")
355
+
356
+ for page in range(1, max_pages + 1):
357
+ if len(file_set.files) >= limit:
358
+ break
359
+
360
+ url = "https://api.github.com/search/code"
361
+ params = {"q": query, "per_page": per_page, "page": page}
362
+
363
+ try:
364
+ # Using the existing github_api instance passed in
365
+ response = github_api.get_response("search code", url, params=params)
366
+ search_data = response.json()
367
+
368
+ items = search_data.get("items", [])
369
+ if not items:
370
+ break
371
+
372
+ for item in items:
373
+ file_set.add(item)
374
+
375
+ # Sleep to respect rate limits
376
+ time.sleep(2.0)
377
+
378
+ except Exception as e:
379
+ print(f"Error fetching page {page}: {e}")
380
+ break
381
+
382
+ return file_set
383
+
384
+
248
385
  @dataclass
249
386
  class GitHubAction:
250
387
  """Represents a GitHub Action with its identifying information and log
@@ -227,12 +227,18 @@ class OsProjects:
227
227
  return url
228
228
 
229
229
  @classmethod
230
- def from_folder(cls, folder_path: str, with_progress: bool = False) -> "OsProjects":
230
+ def from_folder(
231
+ cls,
232
+ folder_path: str,
233
+ with_progress: bool = False,
234
+ project_id: Optional[str] = None,
235
+ ) -> "OsProjects":
231
236
  """Collect all github projects from the given folders.
232
237
 
233
238
  Args:
234
239
  folder_path (str): The path to the folder containing projects.
235
240
  with_progress (bool): Whether to display a progress bar. Defaults to True.
241
+ project_id (Optional[str]): If specified, optimize for finding this specific project.
236
242
 
237
243
  Returns:
238
244
  OsProjects: An instance of OsProjects with collected projects.
@@ -240,16 +246,37 @@ class OsProjects:
240
246
  osp = cls()
241
247
  owners, repos_by_folder = cls.github_repos_of_folder(folder_path)
242
248
 
249
+ # Optimization: If a specific project_id is requested, only fetch data for relevant owners
250
+ if project_id:
251
+ # Find which owner(s) have this project locally
252
+ relevant_owners = set()
253
+ for folder, repo in repos_by_folder.items():
254
+ if repo.project_id == project_id:
255
+ relevant_owners.add(repo.owner)
256
+
257
+ # Only process relevant owners (no need for progress bar when optimized)
258
+ if relevant_owners:
259
+ owners_to_process = relevant_owners
260
+ # Don't show progress bar for optimized single-project lookup
261
+ with_progress = False
262
+ else:
263
+ # Project not found locally, fall back to processing all owners
264
+ owners_to_process = owners
265
+ else:
266
+ owners_to_process = owners
267
+
243
268
  def process_owners(owners_iterable: Iterable[str]):
244
269
  for owner in owners_iterable:
245
270
  osp.add_projects_of_owner(owner)
246
271
 
247
272
  if with_progress:
248
- process_owners(tqdm(owners, desc="Processing owners"))
273
+ process_owners(tqdm(owners_to_process, desc="Processing owners"))
249
274
  else:
250
- process_owners(owners)
275
+ process_owners(owners_to_process)
251
276
 
252
277
  for folder, repo in repos_by_folder.items():
278
+ if project_id and repo.project_id != project_id:
279
+ continue
253
280
  project_url = repo.projectUrl()
254
281
  if project_url not in osp.projects_by_url:
255
282
  logging.warning(f"{project_url} not found in projects_by_url")
@@ -321,7 +348,6 @@ class OsProject:
321
348
  return repo
322
349
 
323
350
  def getIssues(self, limit: int = None, **params) -> List[Ticket]:
324
-
325
351
  # Fetch the raw issue records using the new getIssueRecords method
326
352
  issue_records = self.repo.getIssueRecords(limit=limit, **params)
327
353
 
@@ -376,7 +402,7 @@ class OsProject:
376
402
  def getComments(self, issue_number: int) -> List[dict]:
377
403
  """Fetch all comments for a specific issue number from GitHub."""
378
404
  comments_url = self.commentUrl(issue_number)
379
- gihub_api=GitHubApi.get_instance()
405
+ gihub_api = GitHubApi.get_instance()
380
406
  response = gihub_api.get_response("fetch comments", comments_url)
381
407
  return response.json()
382
408
 
@@ -0,0 +1,16 @@
1
+ """Created on 2024-07-31.
2
+
3
+ @author: wf
4
+ """
5
+
6
+ import osprojects
7
+
8
+
9
+ class Version(object):
10
+ """Version handling for pyOpenSourceProjects."""
11
+
12
+ name = "pyOpenSourceProjects"
13
+ version = osprojects.__version__
14
+ date = "2024-07-31"
15
+ updated = "2026-03-14" # keeping date; version now aligns to 0.5.1
16
+ description = "Check python OpenSource Projects to follow standards for README, github workflow and pyproject.toml - creates badges"
@@ -4,6 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "pyOpenSourceProjects"
7
+ description ="Check python OpenSource Projects to follow standards for README, github workflow and pyprojec.toml - creates badges"
7
8
 
8
9
  authors = [
9
10
  { name = "Wolfgang Fahl", email = "wf@bitplan.com" }
@@ -29,18 +30,25 @@ dependencies = [
29
30
  # https://github.com/pypa/packaging
30
31
  "packaging>=24.1",
31
32
  # https://pypi.org/project/tqdm/
32
- "tqdm>=4.66.5"
33
+ "tqdm>=4.66.5",
34
+ # https://pypi.org/project/ratelimit/
35
+ "ratelimit>=2.2.1",
36
+ # https://pypi.org/project/backoff/
37
+ "backoff",
38
+ # https://github.com/hukkin/tomli
39
+ "tomli>=2.3.0; python_version < '3.11'",
33
40
  ]
34
- requires-python = ">=3.11"
41
+ requires-python = ">=3.10"
35
42
 
36
43
  classifiers = [
37
44
  "Programming Language :: Python",
45
+ "Programming Language :: Python :: 3.10",
38
46
  "Programming Language :: Python :: 3.11",
39
47
  "Programming Language :: Python :: 3.12",
40
48
  "Programming Language :: Python :: 3.13"
41
49
  ]
42
50
 
43
- dynamic = ["version", "description"]
51
+ dynamic = ["version"]
44
52
 
45
53
  [project.urls]
46
54
  Home = "https://github.com/WolfgangFahl/pyOpenSourceProjects"
@@ -3,10 +3,11 @@
3
3
  @author: wf
4
4
  """
5
5
 
6
+ import os
6
7
  import time
7
8
  import unittest
8
9
 
9
- from osprojects.github_api import GitHubAction, GitHubApi
10
+ from osprojects.github_api import GitHubAction, GitHubApi, GitHubFileSet
10
11
  from tests.basetest import BaseTest
11
12
 
12
13
 
@@ -39,6 +40,47 @@ class TestGitHubApi(BaseTest):
39
40
  repos[0], repos[trial], f"Cache was not used for {owner}"
40
41
  )
41
42
 
43
+ @unittest.skipIf(
44
+ BaseTest.inPublicCI(), "Must be authenticated to access the code search API"
45
+ )
46
+ def test_github_cff(self):
47
+ """Retrieves cff files via untargeted search."""
48
+ limit = 100 if self.inPublicCI() else 1000
49
+ debug = self.debug
50
+ debug = True
51
+ verbose = True
52
+ github_api = GitHubApi.get_instance()
53
+ yaml_file = github_api.get_cache_path(f"git_cff_fileset{limit}.yaml")
54
+ query = "filename:CITATION.cff"
55
+
56
+ file_set = None
57
+
58
+ # Check if cache exists
59
+ if os.path.isfile(yaml_file):
60
+ print(f"Loading cached file set from: {yaml_file}")
61
+ # Ensure your class has a load_from_yaml_file or similar method (standard for lod_storable)
62
+ file_set = GitHubFileSet.load_from_yaml_file(
63
+ yaml_file
64
+ ) # @UndefinedVariable
65
+ else:
66
+ print(f"Cache not found. Querying GitHub API: {query}")
67
+ try:
68
+ # Limit max_results if your code supports it to prevent future 403s on fresh runs
69
+ file_set = GitHubFileSet.from_query(query, verbose=verbose)
70
+ file_set.save_to_yaml_file(yaml_file)
71
+ except Exception as e:
72
+ print(f"Warning: API fetch failed or was incomplete: {e}")
73
+ # If the file was partially created/in-memory before crash,
74
+ # you might handle partial saves here, but usually we just fail the test
75
+ # or rely on whatever logic wrote the file before the crash.
76
+
77
+ # Assertions to ensure we actually have data
78
+ self.assertIsNotNone(file_set)
79
+ # Check if we have files (based on your wc -l output, you expect ~1000)
80
+ if debug:
81
+ print(f"github CFF File references loaded: {len(file_set.files)}")
82
+ self.assertGreater(len(file_set.files), 0)
83
+
42
84
  @unittest.skipIf(BaseTest.inPublicCI(), "missing admin rights in public CI")
43
85
  def test_github_action_from_url(self):
44
86
  """Test creating GitHubAction instances from URLs."""
@@ -7,7 +7,6 @@ from argparse import Namespace
7
7
 
8
8
  from osprojects.check_project import CheckProject
9
9
  from osprojects.osproject import Commit, OsProject, Ticket, gitlog2wiki, main
10
-
11
10
  from tests.basetest import BaseTest
12
11
 
13
12
 
@@ -1 +0,0 @@
1
- __version__ = "0.4.1"