pysfi 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysfi-0.1.7.dist-info/METADATA +134 -0
- pysfi-0.1.7.dist-info/RECORD +31 -0
- pysfi-0.1.7.dist-info/WHEEL +4 -0
- pysfi-0.1.7.dist-info/entry_points.txt +15 -0
- sfi/__init__.py +3 -0
- sfi/alarmclock/__init__.py +0 -0
- sfi/alarmclock/alarmclock.py +367 -0
- sfi/bumpversion/__init__.py +3 -0
- sfi/bumpversion/bumpversion.py +535 -0
- sfi/cli.py +11 -0
- sfi/docscan/__init__.py +3 -0
- sfi/docscan/docscan.py +841 -0
- sfi/docscan/docscan_gui.py +596 -0
- sfi/embedinstall/__init__.py +0 -0
- sfi/embedinstall/embedinstall.py +418 -0
- sfi/filedate/__init__.py +0 -0
- sfi/filedate/filedate.py +112 -0
- sfi/makepython/__init__.py +0 -0
- sfi/makepython/makepython.py +326 -0
- sfi/pdfsplit/__init__.py +0 -0
- sfi/pdfsplit/pdfsplit.py +173 -0
- sfi/projectparse/__init__.py +0 -0
- sfi/projectparse/projectparse.py +152 -0
- sfi/pyloadergen/__init__.py +0 -0
- sfi/pyloadergen/pyloadergen.py +995 -0
- sfi/pypacker/__init__.py +0 -0
- sfi/pypacker/fspacker.py +91 -0
- sfi/taskkill/__init__.py +0 -0
- sfi/taskkill/taskkill.py +236 -0
- sfi/which/__init__.py +0 -0
- sfi/which/which.py +74 -0
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
import sys
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Callable
|
|
13
|
+
|
|
14
|
+
if sys.version_info >= (3, 11):
|
|
15
|
+
import tomllib
|
|
16
|
+
else:
|
|
17
|
+
import tomli as tomllib # type: ignore
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
is_windows = sys.platform == "win32"
|
|
21
|
+
is_linux = sys.platform == "linux"
|
|
22
|
+
is_macos = sys.platform == "darwin"
|
|
23
|
+
|
|
24
|
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
cwd = Path.cwd()
|
|
27
|
+
|
|
28
|
+
_BUILD_COMMANDS = ["uv", "poetry", "hatch"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def parse_pyproject_toml(directory: Path) -> dict:
|
|
32
|
+
"""Parse pyproject.toml file in directory and return project data."""
|
|
33
|
+
project_toml = directory / "pyproject.toml"
|
|
34
|
+
if not project_toml.is_file():
|
|
35
|
+
logger.error(f"No pyproject.toml found in {directory}")
|
|
36
|
+
return {}
|
|
37
|
+
|
|
38
|
+
with project_toml.open("rb") as f:
|
|
39
|
+
return tomllib.load(f)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _get_build_command_from_toml(directory: Path) -> str | None:
|
|
43
|
+
"""Get build command from pyproject.toml."""
|
|
44
|
+
logger.debug(f"Parsing pyproject.toml in {directory}")
|
|
45
|
+
|
|
46
|
+
project_data = parse_pyproject_toml(directory)
|
|
47
|
+
if not project_data:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
if "build-system" in project_data:
|
|
51
|
+
build_system = project_data["build-system"]
|
|
52
|
+
if "build-backend" in build_system:
|
|
53
|
+
build_backend = build_system["build-backend"]
|
|
54
|
+
if build_backend.startswith("poetry."):
|
|
55
|
+
return "poetry"
|
|
56
|
+
elif build_backend.startswith("hatchling."):
|
|
57
|
+
return "hatch"
|
|
58
|
+
else:
|
|
59
|
+
logger.error(f"Unknown build-backend: {build_backend}")
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
logger.error("No `build-system` or `build-backend` found in pyproject.toml: ")
|
|
63
|
+
logger.error(json.dumps(project_data, indent=2, ensure_ascii=False, sort_keys=True))
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _get_build_command(directory: Path):
|
|
68
|
+
"""Get build command from directory."""
|
|
69
|
+
project_path = directory / "pyproject.toml"
|
|
70
|
+
if project_path.is_file():
|
|
71
|
+
logger.debug(f"Found pyproject.toml in {directory}")
|
|
72
|
+
return _get_build_command_from_toml(directory)
|
|
73
|
+
|
|
74
|
+
for command in _BUILD_COMMANDS:
|
|
75
|
+
if shutil.which(command):
|
|
76
|
+
logger.debug(f"Found build command: {command}")
|
|
77
|
+
return command
|
|
78
|
+
logger.error(f"No build command found in {directory}")
|
|
79
|
+
sys.exit(1)
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class Command:
|
|
85
|
+
name: str
|
|
86
|
+
alias: str
|
|
87
|
+
cmds: list[str] | Callable[..., Any] | None = None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _clean(root_dir: Path):
|
|
91
|
+
_run_command(["rm", "-rf", "dist", "build", "*.egg-info"], root_dir)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
BUILD_COMMAND = _get_build_command(cwd) or ""
|
|
95
|
+
_COMMANDS = [
|
|
96
|
+
Command(name="build", alias="b", cmds=[BUILD_COMMAND, "build"]),
|
|
97
|
+
Command(name="bumpversion", alias="bump", cmds=["bumpversion", "patch"]),
|
|
98
|
+
Command(name="clean", alias="c", cmds=_clean),
|
|
99
|
+
Command(name="publish", alias="p"), # No preset commands
|
|
100
|
+
Command(name="test", alias="t", cmds=lambda: os.system("pytest")),
|
|
101
|
+
Command(name="test-benchmark", alias="tb", cmds=lambda: os.system("pytest -m benchmark")),
|
|
102
|
+
Command(name="test-coverage", alias="tc", cmds=lambda: os.system("pytest --cov=sfi")),
|
|
103
|
+
Command(name="token", alias="tk", cmds=lambda: _set_token(BUILD_COMMAND)),
|
|
104
|
+
]
|
|
105
|
+
_COMMAND_DICT = {command.name: command for command in _COMMANDS}
|
|
106
|
+
_COMMAND_DICT.update({command.alias: command for command in _COMMANDS})
|
|
107
|
+
_CHOICES = [command.alias for command in _COMMANDS]
|
|
108
|
+
_CHOICES.extend([command.name for command in _COMMANDS])
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def main():
|
|
112
|
+
parser = argparse.ArgumentParser(description="Make Python")
|
|
113
|
+
parser.add_argument("command", type=str, choices=_CHOICES, help=f"Command to run, options: {_CHOICES}")
|
|
114
|
+
parser.add_argument("--debug", "-d", action="store_true", help="Enable debug mode")
|
|
115
|
+
|
|
116
|
+
args = parser.parse_args()
|
|
117
|
+
if args.debug:
|
|
118
|
+
logger.setLevel(logging.DEBUG)
|
|
119
|
+
|
|
120
|
+
build_command = _get_build_command(cwd) or ""
|
|
121
|
+
logger.info(f"Using build command: {build_command}")
|
|
122
|
+
command = _COMMAND_DICT.get(args.command)
|
|
123
|
+
if command:
|
|
124
|
+
if callable(command.cmds):
|
|
125
|
+
command.cmds()
|
|
126
|
+
elif isinstance(command.cmds, list):
|
|
127
|
+
_run_command(command.cmds, cwd)
|
|
128
|
+
else:
|
|
129
|
+
logger.debug("No preset commands found")
|
|
130
|
+
else:
|
|
131
|
+
logger.error(f"Unknown command: {args.command}")
|
|
132
|
+
sys.exit(1)
|
|
133
|
+
|
|
134
|
+
if args.command in {"publish", "p"}:
|
|
135
|
+
if not _check_pypi_token(build_command):
|
|
136
|
+
_set_token(build_command)
|
|
137
|
+
_run_command([build_command, "publish"], cwd)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _set_token(build_command: str, show_header: bool = True) -> None:
|
|
141
|
+
"""Set PyPI token for the specified build command."""
|
|
142
|
+
if show_header:
|
|
143
|
+
logger.info(f"Setting PyPI token for {build_command}...")
|
|
144
|
+
|
|
145
|
+
if build_command.lower() not in _BUILD_COMMANDS:
|
|
146
|
+
logger.error(f"Unknown build command: {build_command}")
|
|
147
|
+
logger.error(f"Please use `{'/'.join(_BUILD_COMMANDS)}`")
|
|
148
|
+
sys.exit(1)
|
|
149
|
+
|
|
150
|
+
token = input("Enter your PyPI token (leave empty to cancel): ").strip()
|
|
151
|
+
if not token:
|
|
152
|
+
logger.info("Invalid token, cancelled.")
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
if build_command == "uv":
|
|
156
|
+
_set_uv_token(token)
|
|
157
|
+
elif build_command == "poetry":
|
|
158
|
+
_set_poetry_token(token)
|
|
159
|
+
elif build_command == "hatch":
|
|
160
|
+
_set_hatch_token(token)
|
|
161
|
+
else:
|
|
162
|
+
logger.error(f"Unknown build command: {build_command}")
|
|
163
|
+
sys.exit(1)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _set_uv_token(token: str) -> None:
|
|
167
|
+
"""Set PyPI token for uv."""
|
|
168
|
+
_write_to_env_file("UV_PUBLISH_TOKEN", token)
|
|
169
|
+
|
|
170
|
+
# Write to `uv.toml`
|
|
171
|
+
config_path = Path.home() / ".config" / "uv" / "uv.toml"
|
|
172
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
173
|
+
content = f"""[publish]
|
|
174
|
+
token = "{token}"
|
|
175
|
+
"""
|
|
176
|
+
config_path.write_text(content)
|
|
177
|
+
logger.info(f"Token saved to {config_path}")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _set_poetry_token(token: str) -> None:
|
|
181
|
+
"""Set PyPI token for poetry."""
|
|
182
|
+
_write_to_env_file("POETRY_PYPI_TOKEN_PYPI", token)
|
|
183
|
+
_run_command(["poetry", "config", "pypi-token.pypi", token], cwd)
|
|
184
|
+
logger.info("Token saved to Poetry configuration.")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _set_hatch_token(token: str) -> None:
|
|
188
|
+
"""Set PyPI token for hatch."""
|
|
189
|
+
pypirc_path = Path.home() / ".pypirc"
|
|
190
|
+
pypirc_content = f"""[pypi]
|
|
191
|
+
repository = https://upload.pypi.org/legacy/
|
|
192
|
+
username = __token__
|
|
193
|
+
password = {token}
|
|
194
|
+
"""
|
|
195
|
+
pypirc_path.write_text(pypirc_content)
|
|
196
|
+
logger.info(f"Token saved to {pypirc_path}")
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _check_pypi_token(build_command: str) -> bool:
|
|
200
|
+
"""Check if PyPI token is configured before publishing."""
|
|
201
|
+
logger.info("Checking PyPI token configuration...")
|
|
202
|
+
|
|
203
|
+
token_found = False
|
|
204
|
+
if build_command == "uv":
|
|
205
|
+
# Check for uv publish token
|
|
206
|
+
token_env_vars = ["UV_PUBLISH_TOKEN", "PYPI_API_TOKEN"]
|
|
207
|
+
for var in token_env_vars:
|
|
208
|
+
if os.getenv(var):
|
|
209
|
+
logger.info(f"Found PyPI token in environment variable: {var}")
|
|
210
|
+
token_found = True
|
|
211
|
+
break
|
|
212
|
+
|
|
213
|
+
# Check for config file
|
|
214
|
+
config_path = Path.home() / ".config" / "uv" / "uv.toml"
|
|
215
|
+
if config_path.exists():
|
|
216
|
+
logger.info(f"Found uv config file: {config_path}")
|
|
217
|
+
token_found = True
|
|
218
|
+
|
|
219
|
+
elif build_command == "poetry":
|
|
220
|
+
# Check for poetry token
|
|
221
|
+
if os.getenv("POETRY_PYPI_TOKEN_PYPI"):
|
|
222
|
+
logger.info("Found PyPI token in POETRY_PYPI_TOKEN_PYPI environment variable")
|
|
223
|
+
token_found = True
|
|
224
|
+
|
|
225
|
+
# Check for poetry config
|
|
226
|
+
result = subprocess.run(
|
|
227
|
+
["poetry", "config", "pypi-token.pypi"],
|
|
228
|
+
capture_output=True,
|
|
229
|
+
text=True,
|
|
230
|
+
)
|
|
231
|
+
if result.stdout.strip() and result.stdout.strip() != "None":
|
|
232
|
+
logger.info("Found PyPI token in Poetry configuration")
|
|
233
|
+
token_found = True
|
|
234
|
+
|
|
235
|
+
elif build_command == "hatch":
|
|
236
|
+
# Check for .pypirc
|
|
237
|
+
pypirc_path = Path.home() / ".pypirc"
|
|
238
|
+
if pypirc_path.exists():
|
|
239
|
+
logger.info(f"Found .pypirc file: {pypirc_path}")
|
|
240
|
+
token_found = True
|
|
241
|
+
|
|
242
|
+
return token_found
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _run_command(cmd: list[str], directory: Path) -> None:
|
|
246
|
+
"""Run a command in the specified directory."""
|
|
247
|
+
logger.debug(f"Running command: {' '.join(cmd)}")
|
|
248
|
+
try:
|
|
249
|
+
result = subprocess.run(cmd, cwd=directory, capture_output=True, text=True)
|
|
250
|
+
except subprocess.CalledProcessError as e:
|
|
251
|
+
logger.error(f"Command failed with exit code {e.returncode}")
|
|
252
|
+
sys.exit(e.returncode)
|
|
253
|
+
|
|
254
|
+
if result.stdout:
|
|
255
|
+
print(result.stdout)
|
|
256
|
+
if result.stderr:
|
|
257
|
+
print(result.stderr, file=sys.stderr)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _write_to_env_file(key: str, value: str) -> None:
|
|
261
|
+
"""Write key-value pair to environment file."""
|
|
262
|
+
if is_windows:
|
|
263
|
+
subprocess.run(["setx", key, value], shell=True)
|
|
264
|
+
else:
|
|
265
|
+
_write_to_shell_config(f"export {key}='{value}'")
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def _get_shell_config_path() -> Path:
|
|
269
|
+
"""Get the appropriate shell config file based on the current shell."""
|
|
270
|
+
# Try to detect the shell
|
|
271
|
+
shell = os.getenv("SHELL", "")
|
|
272
|
+
if "zsh" in shell:
|
|
273
|
+
return Path.home() / ".zshrc"
|
|
274
|
+
else:
|
|
275
|
+
# Default to .bashrc
|
|
276
|
+
return Path.home() / ".bashrc"
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _write_to_shell_config(content: str) -> None:
|
|
280
|
+
"""Write content to ~/.bashrc, replacing existing entries if they exist.
|
|
281
|
+
|
|
282
|
+
Deprecated: Use _write_to_shell_config instead.
|
|
283
|
+
"""
|
|
284
|
+
config_path = _get_shell_config_path()
|
|
285
|
+
if not config_path.exists():
|
|
286
|
+
logger.warning(f"{config_path} does not exist, creating it...")
|
|
287
|
+
config_path.touch()
|
|
288
|
+
|
|
289
|
+
# Extract the variable name from the export statement
|
|
290
|
+
# Expected format: export VARIABLE_NAME=value
|
|
291
|
+
var_name = None
|
|
292
|
+
for line in content.strip().split("\n"):
|
|
293
|
+
if line.startswith("export ") and "=" in line:
|
|
294
|
+
var_name = line.split("=")[0].replace("export ", "").strip()
|
|
295
|
+
break
|
|
296
|
+
|
|
297
|
+
if not var_name:
|
|
298
|
+
logger.error("Invalid export statement format. Expected: export VARIABLE_NAME=value")
|
|
299
|
+
return
|
|
300
|
+
|
|
301
|
+
# Read existing content
|
|
302
|
+
existing_lines = config_path.read_text(encoding="utf-8").split("\n")
|
|
303
|
+
|
|
304
|
+
# Find and remove existing export statements for this variable
|
|
305
|
+
new_lines = []
|
|
306
|
+
found_existing = False
|
|
307
|
+
for line in existing_lines:
|
|
308
|
+
# Check if this line exports the same variable
|
|
309
|
+
if line.strip().startswith(f"export {var_name}=") or line.strip().startswith(f"export {var_name} ="):
|
|
310
|
+
found_existing = True
|
|
311
|
+
continue
|
|
312
|
+
new_lines.append(line)
|
|
313
|
+
|
|
314
|
+
if found_existing:
|
|
315
|
+
logger.info(f"Found existing export statement for {var_name}, replacing it...")
|
|
316
|
+
|
|
317
|
+
# Add new content
|
|
318
|
+
new_lines.append(content.strip())
|
|
319
|
+
|
|
320
|
+
# Write back to file
|
|
321
|
+
config_path.write_text("\n".join(new_lines), encoding="utf-8")
|
|
322
|
+
|
|
323
|
+
logger.info(f"Content written to {config_path}")
|
|
324
|
+
logger.info(f"Run `source {config_path}` to apply the changes")
|
|
325
|
+
logger.info(f"Run `cat {config_path}` to view the content")
|
|
326
|
+
logger.info(f"Run `cat {config_path} | grep 'export'` to view the exported variables")
|
sfi/pdfsplit/__init__.py
ADDED
|
File without changes
|
sfi/pdfsplit/pdfsplit.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import fitz
|
|
8
|
+
|
|
9
|
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
10
|
+
cwd = Path.cwd()
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def parse_page_ranges(range_str: str, total_pages: int) -> list[int]:
|
|
15
|
+
"""Parse page range string and return list of page numbers (1-indexed)."""
|
|
16
|
+
pages = []
|
|
17
|
+
for part in range_str.split(","):
|
|
18
|
+
part = part.strip()
|
|
19
|
+
if not part:
|
|
20
|
+
continue
|
|
21
|
+
if "-" in part:
|
|
22
|
+
start, end = part.split("-")
|
|
23
|
+
start = int(start) if start else 1
|
|
24
|
+
end = int(end) if end else total_pages
|
|
25
|
+
pages.extend(range(start, end + 1))
|
|
26
|
+
else:
|
|
27
|
+
pages.append(int(part))
|
|
28
|
+
return pages
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def split_by_number(input_file: Path, output_file: Path, number: int):
|
|
32
|
+
"""Split PDF into specified number of parts evenly."""
|
|
33
|
+
doc = fitz.open(input_file)
|
|
34
|
+
total_pages = doc.page_count
|
|
35
|
+
base_pages = total_pages // number
|
|
36
|
+
remainder = total_pages % number
|
|
37
|
+
|
|
38
|
+
logger.debug(
|
|
39
|
+
f"Total pages: {total_pages}, Splitting into {number} parts, {base_pages} base pages per part, {remainder} extra pages"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
current_page = 0
|
|
43
|
+
for i in range(number):
|
|
44
|
+
# First 'remainder' parts get one extra page
|
|
45
|
+
pages_in_this_part = base_pages + (1 if i < remainder else 0)
|
|
46
|
+
|
|
47
|
+
if current_page >= total_pages:
|
|
48
|
+
logger.debug(f"Skipping part {i + 1}: no more pages remaining")
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
end_page = min(current_page + pages_in_this_part, total_pages)
|
|
52
|
+
|
|
53
|
+
part_file = output_file.parent / f"{output_file.stem}_part{i + 1}{output_file.suffix}"
|
|
54
|
+
part_doc = fitz.open()
|
|
55
|
+
|
|
56
|
+
for page_num in range(current_page, end_page):
|
|
57
|
+
part_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
|
|
58
|
+
|
|
59
|
+
part_doc.save(part_file)
|
|
60
|
+
part_doc.close()
|
|
61
|
+
logger.info(f"Created part {i + 1}: {part_file} (pages {current_page + 1}-{end_page})")
|
|
62
|
+
|
|
63
|
+
current_page = end_page
|
|
64
|
+
|
|
65
|
+
doc.close()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def split_by_size(input_file: Path, output_file: Path, size: int):
|
|
69
|
+
"""Split PDF into parts with specified page size."""
|
|
70
|
+
doc = fitz.open(input_file)
|
|
71
|
+
total_pages = doc.page_count
|
|
72
|
+
|
|
73
|
+
logger.debug(f"Total pages: {total_pages}, Splitting with {size} pages per part")
|
|
74
|
+
|
|
75
|
+
part = 0
|
|
76
|
+
start_page = 0
|
|
77
|
+
|
|
78
|
+
while start_page < total_pages:
|
|
79
|
+
end_page = min(start_page + size, total_pages)
|
|
80
|
+
part_file = output_file.parent / f"{output_file.stem}_part{part + 1}{output_file.suffix}"
|
|
81
|
+
part_doc = fitz.open()
|
|
82
|
+
|
|
83
|
+
for page_num in range(start_page, end_page):
|
|
84
|
+
part_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
|
|
85
|
+
|
|
86
|
+
part_doc.save(part_file)
|
|
87
|
+
part_doc.close()
|
|
88
|
+
logger.info(f"Created part {part + 1}: {part_file} (pages {start_page + 1}-{end_page})")
|
|
89
|
+
|
|
90
|
+
start_page = end_page
|
|
91
|
+
part += 1
|
|
92
|
+
|
|
93
|
+
doc.close()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def split_by_range(input_file: Path, output_file: Path, range_str: str):
|
|
97
|
+
"""Extract specific pages from PDF based on range string."""
|
|
98
|
+
doc = fitz.open(input_file)
|
|
99
|
+
total_pages = doc.page_count
|
|
100
|
+
|
|
101
|
+
pages = parse_page_ranges(range_str, total_pages)
|
|
102
|
+
pages = [p - 1 for p in pages if 1 <= p <= total_pages] # Convert to 0-indexed
|
|
103
|
+
|
|
104
|
+
if not pages:
|
|
105
|
+
logger.error("No valid pages found in the specified range")
|
|
106
|
+
doc.close()
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
# Remove duplicates while preserving order
|
|
110
|
+
pages = sorted(set(pages))
|
|
111
|
+
|
|
112
|
+
logger.debug(f"Extracting pages: {[p + 1 for p in pages]}")
|
|
113
|
+
|
|
114
|
+
new_doc = fitz.open()
|
|
115
|
+
for page_num in pages:
|
|
116
|
+
new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
|
|
117
|
+
|
|
118
|
+
new_doc.save(output_file)
|
|
119
|
+
new_doc.close()
|
|
120
|
+
doc.close()
|
|
121
|
+
logger.info(f"Created output file: {output_file} ({len(pages)} pages)")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def main():
|
|
125
|
+
parser = argparse.ArgumentParser(description="Split PDF files")
|
|
126
|
+
parser.add_argument("input", help="Input PDF file")
|
|
127
|
+
parser.add_argument("output", nargs="?", help="Output PDF file (optional for -n and -s modes)")
|
|
128
|
+
parser.add_argument("-o", "--output-dir", default=str(cwd), help="Output directory (default: current directory)")
|
|
129
|
+
parser.add_argument("-f", "--output-format", help="Output file format pattern, e.g., 'split_{part:02d}.pdf'")
|
|
130
|
+
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
|
131
|
+
|
|
132
|
+
# Split by number, size, or range
|
|
133
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
134
|
+
group.add_argument("-n", "--number", type=int, help="Number of splits")
|
|
135
|
+
group.add_argument("-s", "--size", type=int, default=1, help="Size of each split in pages")
|
|
136
|
+
group.add_argument("-r", "--range", type=str, help="Range of pages to extract, e.g., '1,2,4-10,15-20,25-'")
|
|
137
|
+
|
|
138
|
+
args = parser.parse_args()
|
|
139
|
+
|
|
140
|
+
if args.verbose:
|
|
141
|
+
logger.setLevel(logging.DEBUG)
|
|
142
|
+
|
|
143
|
+
output_dir = Path(args.output_dir)
|
|
144
|
+
if not output_dir.is_dir():
|
|
145
|
+
logger.error(f"Output directory {args.output_dir} does not exist, please check the path.")
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
input_file = Path(args.input)
|
|
149
|
+
if not input_file.is_file():
|
|
150
|
+
logger.error(f"Input file {args.input} does not exist, please check the path.")
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
# For -n and -s modes, output is optional and defaults to base name with suffix
|
|
154
|
+
# For -r mode, output is required
|
|
155
|
+
if args.range and not args.output:
|
|
156
|
+
logger.error("Output file is required for -r/--range mode")
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
if not args.range:
|
|
160
|
+
output_file = output_dir / (input_file.stem + "_split.pdf") if not args.output else Path(args.output)
|
|
161
|
+
else:
|
|
162
|
+
output_file = Path(args.output)
|
|
163
|
+
|
|
164
|
+
logger.info(f"Start splitting {input_file}")
|
|
165
|
+
if args.number:
|
|
166
|
+
split_by_number(input_file, output_file, args.number)
|
|
167
|
+
elif args.size:
|
|
168
|
+
split_by_size(input_file, output_file, args.size)
|
|
169
|
+
elif args.range:
|
|
170
|
+
split_by_range(input_file, output_file, args.range)
|
|
171
|
+
else:
|
|
172
|
+
logger.error("Please specify either -n, -s, or -r")
|
|
173
|
+
return
|
|
File without changes
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""Parse pyproject.toml files in directory, supports multiple projects."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import sys
|
|
9
|
+
import time
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
if sys.version_info >= (3, 11):
|
|
13
|
+
import tomllib
|
|
14
|
+
else:
|
|
15
|
+
import tomli as tomllib # type: ignore
|
|
16
|
+
|
|
17
|
+
__all__ = ["parse_project_data"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
cwd = Path.cwd()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_project_data(directory: Path, recursive: bool = False) -> dict:
|
|
26
|
+
"""Parse pyproject.toml file in directory and return project data.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
dict: Project data.
|
|
30
|
+
"""
|
|
31
|
+
data = _parse_pyproject(directory, recursive=recursive)
|
|
32
|
+
if not data:
|
|
33
|
+
return {}
|
|
34
|
+
return _extract_project_info(data)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _parse_pyproject(directory: Path, recursive: bool = False) -> dict[str, dict]:
|
|
38
|
+
"""Parse pyproject.toml file in directory and return raw data."""
|
|
39
|
+
data = {}
|
|
40
|
+
if recursive:
|
|
41
|
+
for pyproject_path in directory.rglob("pyproject.toml"):
|
|
42
|
+
with pyproject_path.open("rb") as f:
|
|
43
|
+
data[pyproject_path.parent.stem] = tomllib.load(f)
|
|
44
|
+
else:
|
|
45
|
+
pyproject_path = directory / "pyproject.toml"
|
|
46
|
+
if not pyproject_path.is_file():
|
|
47
|
+
logger.error(f"No pyproject.toml found in {directory}")
|
|
48
|
+
return {}
|
|
49
|
+
|
|
50
|
+
with pyproject_path.open("rb") as f:
|
|
51
|
+
logger.debug(f"Parsing {pyproject_path}")
|
|
52
|
+
data[pyproject_path.parent.stem] = tomllib.load(f)
|
|
53
|
+
|
|
54
|
+
logger.debug(f"Parsed {len(data)} pyproject.toml files, data: {data}")
|
|
55
|
+
return data
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _extract_project_info(data: dict) -> dict:
|
|
59
|
+
"""Extract commonly used project information from parsed data."""
|
|
60
|
+
if not data:
|
|
61
|
+
logger.error("No data to extract")
|
|
62
|
+
return {}
|
|
63
|
+
|
|
64
|
+
project_info = {}
|
|
65
|
+
for key, value in data.items():
|
|
66
|
+
if "project" in value:
|
|
67
|
+
project = value.get("project", {})
|
|
68
|
+
build_system = value.get("build-system", {})
|
|
69
|
+
project_info.setdefault(
|
|
70
|
+
key,
|
|
71
|
+
{
|
|
72
|
+
"name": project.get("name"),
|
|
73
|
+
"version": project.get("version"),
|
|
74
|
+
"description": project.get("description"),
|
|
75
|
+
"readme": project.get("readme"),
|
|
76
|
+
"requires_python": project.get("requires-python"),
|
|
77
|
+
"dependencies": project.get("dependencies", []),
|
|
78
|
+
"optional_dependencies": project.get("optional-dependencies", {}),
|
|
79
|
+
"scripts": project.get("scripts", {}),
|
|
80
|
+
"entry_points": project.get("entry-points", {}),
|
|
81
|
+
"authors": project.get("authors", []),
|
|
82
|
+
"license": project.get("license"),
|
|
83
|
+
"keywords": project.get("keywords", []),
|
|
84
|
+
"classifiers": project.get("classifiers", []),
|
|
85
|
+
"urls": project.get("urls", {}),
|
|
86
|
+
"build_backend": build_system.get("build-backend"),
|
|
87
|
+
"requires": build_system.get("requires", []),
|
|
88
|
+
},
|
|
89
|
+
)
|
|
90
|
+
else:
|
|
91
|
+
logger.warning(f"No project information found in {key}")
|
|
92
|
+
project_info.setdefault(key, {})
|
|
93
|
+
|
|
94
|
+
logger.debug(f"Extracted {len(project_info)} projects, info: {project_info}")
|
|
95
|
+
return project_info
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _check_directory(directory: str) -> bool:
|
|
99
|
+
"""Check if directory is valid."""
|
|
100
|
+
if not directory:
|
|
101
|
+
logger.error("Error: No directory specified")
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
dir_path = Path(directory)
|
|
105
|
+
if not dir_path.is_dir():
|
|
106
|
+
logger.error(f"Error: {dir_path} is not a directory")
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
return True
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def main():
|
|
113
|
+
parser = argparse.ArgumentParser()
|
|
114
|
+
parser.add_argument("--directory", "-D", type=str, default=str(cwd), help="Directory to parse")
|
|
115
|
+
parser.add_argument("--debug", "-d", action="store_true", help="Debug mode")
|
|
116
|
+
parser.add_argument("--recursive", "-r", action="store_true", help="Recursively parse subdirectories")
|
|
117
|
+
parser.add_argument("--show", "-s", action="store_true", help="Show parsed data")
|
|
118
|
+
parser.add_argument("--output", "-o", type=str, default="projects.json", help="Output file path")
|
|
119
|
+
|
|
120
|
+
args = parser.parse_args()
|
|
121
|
+
if args.debug:
|
|
122
|
+
logger.setLevel(logging.DEBUG)
|
|
123
|
+
|
|
124
|
+
if not _check_directory(args.directory):
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
output_path = (cwd / args.output).with_suffix(".json")
|
|
128
|
+
if args.show:
|
|
129
|
+
if output_path.is_file():
|
|
130
|
+
logger.info(f"Loading output from `{output_path}`:")
|
|
131
|
+
with output_path.open("r", encoding="utf-8") as f:
|
|
132
|
+
output_data = json.load(f)
|
|
133
|
+
logger.info(json.dumps(output_data, indent=2, ensure_ascii=False, sort_keys=True))
|
|
134
|
+
return
|
|
135
|
+
else:
|
|
136
|
+
logger.debug(f"No json file found at {output_path}, continue parsing...")
|
|
137
|
+
|
|
138
|
+
t0 = time.perf_counter()
|
|
139
|
+
logger.info(f"Parsing pyproject.toml in {args.directory}")
|
|
140
|
+
output_data = parse_project_data(Path(args.directory), recursive=args.recursive)
|
|
141
|
+
if args.show:
|
|
142
|
+
logger.info(json.dumps(output_data, indent=2, ensure_ascii=False, sort_keys=True))
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
with output_path.open("w", encoding="utf-8") as f:
|
|
147
|
+
json.dump(output_data, f, indent=2, ensure_ascii=False)
|
|
148
|
+
except Exception as e:
|
|
149
|
+
logger.error(f"Error writing output to {output_path}: {e}")
|
|
150
|
+
return
|
|
151
|
+
else:
|
|
152
|
+
logger.info(f"Output written to {output_path}, took {time.perf_counter() - t0:.4f}s")
|
|
File without changes
|