pysfi 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sfi/img2pdf/img2pdf.py ADDED
@@ -0,0 +1,453 @@
1
+ """Convert images in a directory to a single PDF file.
2
+
3
+ This module provides functionality to convert multiple image files in a directory
4
+ into a single PDF file. It supports various image formats and offers options
5
+ to normalize images (scale, rotate) before conversion.
6
+
7
+ Command: img2pdf [--normalize]
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import atexit
14
+ import concurrent.futures
15
+ import json
16
+ import logging
17
+ from dataclasses import dataclass
18
+ from functools import cached_property
19
+ from pathlib import Path
20
+
21
+ from PIL import Image
22
+ from PIL.Image import Resampling
23
+
24
+ logging.basicConfig(level=logging.INFO)
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ CONFIG_FILE = Path.home() / ".pysfi" / "img2pdf.json"
29
+
30
+
31
+ @dataclass
32
+ class ImageToPdfConfig:
33
+ """Configuration for image to PDF conversion."""
34
+
35
+ DPI: int = 300
36
+ EXTENSIONS: set[str] = None
37
+
38
+ def __post_init__(self) -> None:
39
+ # Initialize default extensions if not provided
40
+ if self.EXTENSIONS is None:
41
+ self.EXTENSIONS = {
42
+ ".jpg",
43
+ ".jpeg",
44
+ ".png",
45
+ ".gif",
46
+ ".bmp",
47
+ ".webp",
48
+ ".tiff",
49
+ ".ico",
50
+ }
51
+
52
+ # Load existing configuration from file
53
+ if CONFIG_FILE.exists():
54
+ try:
55
+ config_data = json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
56
+ # Update configuration items, keeping defaults as fallback
57
+ for key, value in config_data.items():
58
+ if hasattr(self, key) and isinstance(
59
+ value, type(getattr(self, key))
60
+ ):
61
+ if key == "EXTENSIONS":
62
+ setattr(self, key, set(value))
63
+ else:
64
+ setattr(self, key, value)
65
+ except (json.JSONDecodeError, TypeError, AttributeError):
66
+ print(f"Warning: Could not load config from {CONFIG_FILE}")
67
+
68
+ def save(self) -> None:
69
+ """Save current configuration to file."""
70
+ CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
71
+ config_dict = {"DPI": self.DPI, "EXTENSIONS": list(self.EXTENSIONS)}
72
+ CONFIG_FILE.write_text(json.dumps(config_dict, indent=4), encoding="utf-8")
73
+
74
+
75
+ conf = ImageToPdfConfig()
76
+ atexit.register(conf.save)
77
+
78
+
79
+ # Magic numbers for image file headers.
80
+ _MAGIC_NUMBERS: dict[str, bytes] = {
81
+ "jpg": b"\xff\xd8\xff",
82
+ "jpeg": b"\xff\xd8\xff",
83
+ "png": b"\x89PNG\r\n\x1a\n",
84
+ "gif": b"GIF87a",
85
+ "bmp": b"BM",
86
+ "webp": b"RIFFf\x00\x00\x00WEBP",
87
+ "tiff": b"II*\x00",
88
+ "ico": b"ICON",
89
+ "svg": b"<svg",
90
+ }
91
+
92
+
93
+ def is_valid_image(file_path: Path) -> bool:
94
+ """Validate image file.
95
+
96
+ Args:
97
+ file_path: Path to the image file to validate
98
+
99
+ Returns:
100
+ bool: True if the file is a valid image file, False otherwise
101
+ """
102
+ # Basic validation.
103
+ try:
104
+ stat_result = file_path.stat()
105
+ if stat_result.st_size == 0:
106
+ logger.debug(f"Empty file: {file_path}")
107
+ return False
108
+ except OSError:
109
+ logger.debug(f"File not found or inaccessible: {file_path}")
110
+ return False
111
+
112
+ # Extension validation.
113
+ ext = file_path.suffix.lower()
114
+ if ext not in conf.EXTENSIONS:
115
+ logger.debug(f"Invalid image extension: {ext}, {file_path}")
116
+ return False
117
+
118
+ # File header validation.
119
+ try:
120
+ with file_path.open("rb") as f:
121
+ header = f.read(16) # Read more bytes to improve detection
122
+ if not any(header.startswith(v) for v in _MAGIC_NUMBERS.values()):
123
+ logger.debug(f"Invalid image header: {header[:8]}")
124
+ return False
125
+ except OSError:
126
+ logger.debug(f"Cannot read file header: {file_path}")
127
+ return False
128
+
129
+ logger.info(f"Valid image: {file_path}")
130
+ return True
131
+
132
+
133
+ @dataclass(frozen=True)
134
+ class ImageToPDFRunner:
135
+ """Image to PDF converter processor.
136
+
137
+ Processes image files in a directory and converts them to a single PDF file.
138
+ Supports normalization (scaling, rotating) of images before conversion.
139
+ """
140
+
141
+ root_dir: Path # Directory containing images to convert
142
+ dpi: int # DPI setting for PDF output
143
+ normalize: bool = True # Whether to normalize images (scale, rotate)
144
+
145
+ def run(self) -> None:
146
+ """Execute the image to PDF conversion process.
147
+
148
+ Converts all valid images in the root directory to a single PDF file.
149
+ The resulting PDF is saved in the same directory with the directory name as filename.
150
+ """
151
+ logger.info(f"Start converting, using dpi={self.dpi}")
152
+ converted_images = self.converted_images
153
+ if not converted_images:
154
+ logger.error(f"No converted image file found in: {self.root_dir}")
155
+ return
156
+
157
+ # Save the PDF with optimized settings
158
+ converted_images[0].save(
159
+ self.output_pdf,
160
+ "PDF",
161
+ resolution=100.0,
162
+ save_all=True,
163
+ append_images=converted_images[1:],
164
+ optimize=True,
165
+ )
166
+ logger.info(f"Create pdf file: {self.output_pdf}")
167
+
168
+ @cached_property
169
+ def output_pdf(self) -> Path:
170
+ """Get the output PDF file path.
171
+
172
+ Returns:
173
+ Path object for the output PDF file, located in the root directory
174
+ with the directory name as the filename.
175
+ """
176
+ return self.root_dir / f"{self.root_dir.name}.pdf"
177
+
178
+ @cached_property
179
+ def size(self) -> tuple[int, int]:
180
+ """Get page size based on DPI setting.
181
+
182
+ Returns:
183
+ Tuple of (width, height) in pixels based on DPI
184
+ """
185
+ return (int(8.27 * self.dpi), int(11.69 * self.dpi))
186
+
187
+ @cached_property
188
+ def page_size(self) -> tuple[int, int]:
189
+ """Get page size based on DPI setting.
190
+
191
+ Returns:
192
+ Tuple of (width, height) in pixels based on DPI
193
+ """
194
+ return (int(8.27 * self.dpi), int(11.69 * self.dpi))
195
+
196
+ @cached_property
197
+ def converted_images(self) -> list[Image.Image]:
198
+ """Convert all image files to PIL Image objects.
199
+
200
+ Processes images in batches to manage memory usage and uses ThreadPoolExecutor
201
+ for parallel conversion within each batch.
202
+
203
+ Returns:
204
+ List of PIL Image objects representing converted images.
205
+ """
206
+ # Process images in batches to manage memory usage
207
+ batch_size = 10 # Adjust batch size based on available memory
208
+ all_results = []
209
+
210
+ for i in range(0, len(self.image_files), batch_size):
211
+ batch = self.image_files[i : i + batch_size]
212
+ logger.info(
213
+ f"Processing batch {i // batch_size + 1}/{(len(self.image_files) - 1) // batch_size + 1}"
214
+ )
215
+
216
+ # Use ThreadPoolExecutor to convert images in parallel within each batch
217
+ with concurrent.futures.ThreadPoolExecutor(
218
+ max_workers=min(len(batch), 4)
219
+ ) as executor:
220
+ # Submit all conversion tasks for this batch
221
+ futures = [
222
+ executor.submit(self._convert, file, normalize=self.normalize)
223
+ for file in batch
224
+ ]
225
+
226
+ # Collect results, filtering out None values
227
+ batch_results = [
228
+ future.result() for future in futures if future.result() is not None
229
+ ]
230
+
231
+ all_results.extend(batch_results)
232
+
233
+ # Explicitly clean up completed futures to free memory
234
+ for future in futures:
235
+ del future
236
+
237
+ return all_results
238
+
239
+ @cached_property
240
+ def image_files(self) -> list[Path]:
241
+ """Get list of valid image files in the root directory.
242
+
243
+ Returns:
244
+ List of Path objects representing valid image files, sorted alphabetically.
245
+ """
246
+ all_files = list(self.root_dir.iterdir())
247
+ image_filepath = sorted([file for file in all_files if is_valid_image(file)])
248
+
249
+ if not image_filepath:
250
+ logger.warning(f"No valid image files found in: {self.root_dir}")
251
+ logger.info(f"Total files in directory: {len(all_files)}")
252
+ return []
253
+ else:
254
+ logger.info(
255
+ f"Found {len(image_filepath)} valid image files out of {len(all_files)} total files"
256
+ )
257
+
258
+ return image_filepath
259
+
260
+ def _convert(
261
+ self,
262
+ filepath: Path,
263
+ normalize: bool = True,
264
+ ) -> Image.Image | None:
265
+ """Convert image to PDF.
266
+
267
+ Args:
268
+ filepath: Path to the image file to convert
269
+ normalize: Whether to normalize the image (scale, rotate).
270
+ Defaults to True.
271
+
272
+ Returns:
273
+ PIL Image object if conversion successful, None otherwise.
274
+
275
+ Raises:
276
+ Exception: If there's an error opening the image
277
+ """
278
+ try:
279
+ # Open and convert to RGB immediately to reduce memory usage
280
+ with Image.open(str(filepath)) as img:
281
+ # Convert to RGB if needed to ensure compatibility with PDF
282
+ if img.mode in ("RGBA", "LA", "P"):
283
+ # Handle transparency by compositing on white background
284
+ rgb_img = Image.new("RGB", img.size, (255, 255, 255))
285
+ if img.mode == "P" and "transparency" in img.info:
286
+ img = img.convert("RGBA")
287
+ if img.mode in ("RGBA", "LA"):
288
+ rgb_img.paste(
289
+ img,
290
+ mask=img.split()[-1]
291
+ if img.mode == "RGBA"
292
+ else img.split()[-1],
293
+ )
294
+ else:
295
+ rgb_img.paste(img)
296
+ image = rgb_img
297
+ else:
298
+ image = img.convert("RGB")
299
+ except Exception as e:
300
+ logger.error(f"Failed to open image {filepath}: {e}")
301
+ return None
302
+
303
+ if normalize:
304
+ logger.info(f"Normalizing image: {filepath}")
305
+
306
+ image = self._auto_rotate_image(image)
307
+ image = self._auto_scale_image(image)
308
+ image.thumbnail(self.page_size, Resampling.LANCZOS)
309
+
310
+ # Create new image with white background and paste the thumbnail
311
+ converted_image = Image.new(
312
+ "RGB",
313
+ self.page_size,
314
+ (255, 255, 255),
315
+ )
316
+ converted_image.paste(
317
+ image,
318
+ (
319
+ (self.page_size[0] - image.size[0]) // 2,
320
+ (self.page_size[1] - image.size[1]) // 2,
321
+ ),
322
+ )
323
+ logger.info(f"Image normalized: {filepath}")
324
+ else:
325
+ # Ensure image is in RGB mode
326
+ converted_image = image
327
+
328
+ if converted_image:
329
+ logger.debug(f"Convert image: {filepath} successfully")
330
+ # Return RGB converted image
331
+ return converted_image
332
+
333
+ logger.warning(f"Convert image: {filepath} failed")
334
+ # Ensure cleanup of any created image objects
335
+ try:
336
+ if "image" in locals():
337
+ del image
338
+ if "rgb_img" in locals():
339
+ del rgb_img
340
+ except:
341
+ pass # Ignore cleanup errors
342
+ return None
343
+
344
+ def _auto_rotate_image(self, image: Image.Image) -> Image.Image:
345
+ """Auto rotate image to correct orientation.
346
+
347
+ If the image width is greater than its height, rotates the image 90 degrees
348
+ clockwise to convert from landscape to portrait orientation.
349
+
350
+ Args:
351
+ image: PIL Image object to rotate
352
+
353
+ Returns:
354
+ PIL Image object in portrait orientation
355
+ """
356
+ width, height = image.size
357
+ if width > height:
358
+ image = image.rotate(90, expand=True)
359
+
360
+ return image
361
+
362
+ def _auto_scale_image(self, image: Image.Image) -> Image.Image:
363
+ """Auto scale image to fit page dimensions.
364
+
365
+ If the image is smaller than the page size, scales it up proportionally
366
+ to fit within the page dimensions while maintaining aspect ratio.
367
+
368
+ Args:
369
+ image: PIL Image object to scale
370
+
371
+ Returns:
372
+ Scaled PIL Image object
373
+ """
374
+ if image.size[0] < self.page_size[0] or image.size[1] < self.page_size[1]:
375
+ scale_w = self.page_size[0] / image.size[0]
376
+ scale_h = self.page_size[1] / image.size[1]
377
+ scale = max(
378
+ scale_w,
379
+ scale_h,
380
+ )
381
+
382
+ new_size = (
383
+ int(image.size[0] * scale),
384
+ int(image.size[1] * scale),
385
+ )
386
+ image = image.resize(new_size, Resampling.LANCZOS)
387
+ return image
388
+
389
+
390
+ def parse_args() -> argparse.Namespace:
391
+ """Parse command line arguments for the img2pdf tool.
392
+
393
+ Returns:
394
+ Namespace object containing parsed arguments:
395
+ - directory: Path to the directory containing images to convert
396
+ - dpi: DPI setting for the output PDF
397
+ - normalize: Whether to normalize images (scale, rotate)
398
+ """
399
+ import argparse
400
+
401
+ parser = argparse.ArgumentParser(description="Convert images to PDF.")
402
+ parser.add_argument(
403
+ "directory",
404
+ type=str,
405
+ nargs="?",
406
+ default=str(Path.cwd()),
407
+ help="Image directory",
408
+ )
409
+ parser.add_argument("--dpi", type=int, default=300, help="DPI for PDF")
410
+ parser.add_argument(
411
+ "--normalize",
412
+ "-n",
413
+ action="store_true",
414
+ dest="normalize",
415
+ help="Normalize images (scale, rotate)",
416
+ )
417
+ parser.add_argument(
418
+ "--no-normalize",
419
+ action="store_false",
420
+ dest="normalize",
421
+ help="Disable image normalization",
422
+ )
423
+ args = parser.parse_args()
424
+
425
+ return args
426
+
427
+
428
+ def main() -> None:
429
+ """Main entry point for the img2pdf command line tool.
430
+
431
+ Parses command line arguments, validates the input directory,
432
+ and executes the image to PDF conversion process.
433
+ """
434
+ args = parse_args()
435
+
436
+ # Convert string path to Path object
437
+ directory_path = Path(args.directory)
438
+
439
+ # Validate directory exists
440
+ if not directory_path.exists():
441
+ logger.error(f"Directory does not exist: {directory_path}")
442
+ return
443
+
444
+ if not directory_path.is_dir():
445
+ logger.error(f"Path is not a directory: {directory_path}")
446
+ return
447
+
448
+ proc = ImageToPDFRunner(
449
+ root_dir=directory_path,
450
+ dpi=args.dpi,
451
+ normalize=args.normalize,
452
+ )
453
+ proc.run()
@@ -11,8 +11,8 @@ import json
11
11
  import logging
12
12
  import sys
13
13
  from codecs import getincrementaldecoder
14
+ from dataclasses import dataclass
14
15
  from pathlib import Path
15
- from types import SimpleNamespace
16
16
  from typing import ClassVar
17
17
  from urllib.error import URLError
18
18
  from urllib.request import Request, urlopen
@@ -34,14 +34,15 @@ from PySide2.QtWidgets import (
34
34
  QWidget,
35
35
  )
36
36
 
37
- CONFIG_FILE = Path.home() / ".sfi" / "llmclient.json"
37
+ CONFIG_FILE = Path.home() / ".pysfi" / "llmclient.json"
38
38
  logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
39
39
  logger = logging.getLogger(__name__)
40
40
 
41
41
  CONNECTION_TIMEOUT = 5
42
42
 
43
43
 
44
- class LLMClientConfig(SimpleNamespace):
44
+ @dataclass
45
+ class LLMClientConfig:
45
46
  """LLM Chat client configuration."""
46
47
 
47
48
  TITLE: str = "Llama Local Model Tool"
@@ -52,18 +53,23 @@ class LLMClientConfig(SimpleNamespace):
52
53
  TEMPERATURE: float = 0.7
53
54
  TOP_P: float = 0.9
54
55
  TOP_K: int = 40
55
-
56
56
  MAX_TOKENS_RANGE: ClassVar[list[int]] = [1, 4096]
57
57
  TEMPERATURE_RANGE: ClassVar[list[float]] = [0.0, 2.0]
58
58
  TOP_P_RANGE: ClassVar[list[float]] = [0.0, 1.0]
59
59
  TOP_K_RANGE: ClassVar[list[int]] = [1, 100]
60
+ _loaded_from_file: bool = False
60
61
 
61
- def __init__(self) -> None:
62
+ def __post_init__(self) -> None:
62
63
  if CONFIG_FILE.exists():
63
64
  logger.info("Loading configuration from %s", CONFIG_FILE)
64
65
  try:
65
- self.__dict__.update(json.loads(CONFIG_FILE.read_text()))
66
- except (json.JSONDecodeError, TypeError) as e:
66
+ config_data = json.loads(CONFIG_FILE.read_text())
67
+ # Update instance attributes with loaded values
68
+ for key, value in config_data.items():
69
+ if hasattr(self, key):
70
+ setattr(self, key, value)
71
+ self._loaded_from_file = True
72
+ except (json.JSONDecodeError, TypeError, AttributeError) as e:
67
73
  logger.warning("Failed to load configuration: %s", e)
68
74
  logger.info("Using default configuration")
69
75
  else:
@@ -72,7 +78,24 @@ class LLMClientConfig(SimpleNamespace):
72
78
  def save(self) -> None:
73
79
  """Save configuration."""
74
80
  CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
75
- CONFIG_FILE.write_text(json.dumps(vars(self), indent=4))
81
+ # Convert dataclass to dict for JSON serialization
82
+ config_dict = {}
83
+ for attr_name in dir(self):
84
+ if not attr_name.startswith("_") and attr_name not in [
85
+ "WIN_SIZE",
86
+ "WIN_POS",
87
+ "MAX_TOKENS_RANGE",
88
+ "TEMPERATURE_RANGE",
89
+ "TOP_P_RANGE",
90
+ "TOP_K_RANGE",
91
+ ]:
92
+ try:
93
+ attr_value = getattr(self, attr_name)
94
+ if not callable(attr_value):
95
+ config_dict[attr_name] = attr_value
96
+ except AttributeError:
97
+ continue
98
+ CONFIG_FILE.write_text(json.dumps(config_dict, indent=4))
76
99
 
77
100
 
78
101
  conf = LLMClientConfig()
@@ -8,8 +8,8 @@ import logging
8
8
  import pathlib
9
9
  import subprocess
10
10
  import sys
11
+ from dataclasses import dataclass
11
12
  from pathlib import Path
12
- from types import SimpleNamespace
13
13
 
14
14
  from PySide2.QtCore import QThread, Signal, Slot
15
15
  from PySide2.QtGui import QMoveEvent, QResizeEvent
@@ -29,28 +29,46 @@ from PySide2.QtWidgets import (
29
29
  QWidget,
30
30
  )
31
31
 
32
- CONFIG_FILE = Path.home() / ".sfi" / "llmquantize.json"
32
+ CONFIG_FILE = Path.home() / ".pysfi" / "llmquantize.json"
33
33
 
34
34
  logging.basicConfig(level=logging.INFO)
35
35
  logger = logging.getLogger(__name__)
36
36
 
37
37
 
38
- class QuantizerConfig(SimpleNamespace):
38
+ @dataclass
39
+ class QuantizerConfig:
39
40
  """GGUF量化转换工具配置."""
40
41
 
41
42
  TITLE: str = "GGUF量化转换工具"
42
- WIN_SIZE: list[int] = [600, 500] # noqa: RUF012
43
- WIN_POS: list[int] = [100, 100] # noqa: RUF012
43
+ WIN_SIZE: list[int] = None
44
+ WIN_POS: list[int] = None
44
45
  LAST_INPUT_FILE: str = ""
45
- SELECTED_QUANTS: list[str] = ["Q4_K_M", "Q5_K_M"] # noqa: RUF012
46
+ SELECTED_QUANTS: list[str] = None
47
+ _loaded_from_file: bool = False
48
+
49
+ def __post_init__(self) -> None:
50
+ # 初始化默认值
51
+ if self.WIN_SIZE is None:
52
+ self.WIN_SIZE = [600, 500]
53
+ if self.WIN_POS is None:
54
+ self.WIN_POS = [100, 100]
55
+ if self.SELECTED_QUANTS is None:
56
+ self.SELECTED_QUANTS = ["Q4_K_M", "Q5_K_M"]
46
57
 
47
- def __init__(self) -> None:
48
58
  if CONFIG_FILE.exists():
49
59
  logger.info("Loading configuration from %s", CONFIG_FILE)
50
60
  try:
51
- # 直接更新,忽略无效字段
52
- self.__dict__.update(json.loads(CONFIG_FILE.read_text()))
53
- except (json.JSONDecodeError, TypeError) as e:
61
+ config_data = json.loads(CONFIG_FILE.read_text())
62
+ # 更新实例属性,只更新存在的属性
63
+ for key, value in config_data.items():
64
+ if hasattr(self, key):
65
+ if key in ["WIN_SIZE", "WIN_POS", "SELECTED_QUANTS"]:
66
+ # 对于列表类型,需要特别处理
67
+ setattr(self, key, value)
68
+ else:
69
+ setattr(self, key, value)
70
+ self._loaded_from_file = True
71
+ except (json.JSONDecodeError, TypeError, AttributeError) as e:
54
72
  logger.warning("Failed to load configuration: %s", e)
55
73
  logger.info("Using default configuration")
56
74
  else:
@@ -59,7 +77,17 @@ class QuantizerConfig(SimpleNamespace):
59
77
  def save(self) -> None:
60
78
  """保存配置."""
61
79
  CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
62
- CONFIG_FILE.write_text(json.dumps(vars(self), indent=4))
80
+ # 将数据类转换为字典进行JSON序列化
81
+ config_dict = {}
82
+ for attr_name in dir(self):
83
+ if not attr_name.startswith("_") and attr_name not in ["TITLE"]:
84
+ try:
85
+ attr_value = getattr(self, attr_name)
86
+ if not callable(attr_value):
87
+ config_dict[attr_name] = attr_value
88
+ except AttributeError:
89
+ continue
90
+ CONFIG_FILE.write_text(json.dumps(config_dict, indent=4))
63
91
 
64
92
 
65
93
  conf = QuantizerConfig()
@@ -0,0 +1 @@
1
+