simtoolsz 0.2.9__tar.gz → 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/PKG-INFO +1 -1
  2. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/pyproject.toml +1 -1
  3. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/__init__.py +1 -1
  4. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/reader.py +91 -6
  5. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/.github/workflows/publish.yml +0 -0
  6. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/.gitignore +0 -0
  7. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/.python-version +0 -0
  8. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/LICENSE +0 -0
  9. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/README.md +0 -0
  10. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/README_EN.md +0 -0
  11. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/docs/DATETIME_CONVERSION.md +0 -0
  12. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/docs/mail_usage_guide.md +0 -0
  13. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/docs/special2db_usage.md +0 -0
  14. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/examples/conversion_examples.py +0 -0
  15. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/examples/mail_examples.py +0 -0
  16. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/examples/special2db_example.py +0 -0
  17. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/examples/today_examples.py +0 -0
  18. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/examples/zip2db_example.py +0 -0
  19. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/requirements-dev.lock +0 -0
  20. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/requirements.lock +0 -0
  21. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/datetime.py +0 -0
  22. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/db.py +0 -0
  23. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/mail.py +0 -0
  24. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/utils.py +0 -0
  25. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/test_optimized_reader.py +0 -0
  26. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_conversion.py +0 -0
  27. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_iso_comprehensive.py +0 -0
  28. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_iso_format.py +0 -0
  29. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_simple.py +0 -0
  30. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_special2db.py +0 -0
  31. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_special2db_simple.py +0 -0
  32. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_today_optimized.py +0 -0
  33. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_which_format.py +0 -0
  34. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_zip2db.py +0 -0
  35. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_zip2db_simple.py +0 -0
  36. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/verify_unicode_fix.py +0 -0
  37. {simtoolsz-0.2.9 → simtoolsz-0.2.11}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: simtoolsz
3
- Version: 0.2.9
3
+ Version: 0.2.11
4
4
  Summary: A simple and convenient toolkit containing useful functions, classes, and methods.
5
5
  Project-URL: Homepage, https://github.com/SidneyLYZhang/simtoolsz
6
6
  Project-URL: Repository, https://github.com/SidneyLYZhang/simtoolsz.git
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "simtoolsz"
3
- version = "0.2.9"
3
+ version = "0.2.11"
4
4
  description = "A simple and convenient toolkit containing useful functions, classes, and methods."
5
5
  keywords = ["tool", "collection"]
6
6
  license = { text = "MulanPSL-2.0" }
@@ -10,7 +10,7 @@ import simtoolsz.reader as reader
10
10
  try:
11
11
  __version__ = importlib.metadata.version("simtoolsz")
12
12
  except importlib.metadata.PackageNotFoundError:
13
- __version__ = "0.2.9"
13
+ __version__ = "0.2.11"
14
14
 
15
15
  __all__ = [
16
16
  '__version__', 'mail', 'utils', 'datetime', 'db', 'reader'
@@ -12,7 +12,7 @@ from tempfile import TemporaryDirectory
12
12
 
13
13
  __all__ = [
14
14
  "read_tsv", "scan_tsv", "getreader", "load_data", "read_archive",
15
- "is_archive_file"
15
+ "is_archive_file", "excel_sheet_names", "load_excel"
16
16
  ]
17
17
 
18
18
  def read_tsv(filepath: Path, lazy: bool = False, **kwargs
@@ -233,6 +233,8 @@ def _is_archive_file(file_path: Path) -> bool:
233
233
  """
234
234
  if not file_path.is_file():
235
235
  return False
236
+ if file_path.suffix in ('.xlsx', '.xls', '.ods'):
237
+ return False
236
238
  return is_zipfile(file_path) or is_tarfile(file_path)
237
239
 
238
240
 
@@ -478,9 +480,92 @@ def load_data(
478
480
  if transtype is not None:
479
481
  if isinstance(transtype, pl.Expr):
480
482
  df = df.with_columns(transtype)
481
- elif isinstance(transtype, list):
483
+ if isinstance(transtype, list):
482
484
  df = df.with_columns(*transtype)
483
- else:
484
- raise ValueError("transtype must be a polars expression or a list of polars expressions")
485
-
486
- return df
485
+ return df
486
+
487
+ def excel_sheet_names(
488
+ file_path: Path | str
489
+ ) -> list[str]:
490
+ """
491
+ Get the names of all sheets in an Excel file.
492
+
493
+ Args:
494
+ file_path: Path to the Excel file
495
+
496
+ Returns:
497
+ list[str]: Names of all sheets in the Excel file
498
+ """
499
+ with ZipFile(file_path, 'r') as zf:
500
+ xml_content = zf.read("xl/workbook.xml").decode('utf-8')
501
+ sheet_names = re.findall(r'<sheet name="([^"]+)"', xml_content)
502
+ return sheet_names
503
+
504
+ def _get_excel_samecolumns_sheet(
505
+ file_path: Path | str
506
+ ) -> list[list[str]] :
507
+ """
508
+ Get the names of all sheets in an Excel file that have the same columns.
509
+
510
+ Args:
511
+ file_path: Path to the Excel file
512
+
513
+ Returns:
514
+ list[list[str]]: Names of all sheets in the Excel file that have the same columns.
515
+ """
516
+ sheet_names = excel_sheet_names(file_path)
517
+ if len(sheet_names) == 0:
518
+ raise ValueError("Excel file has no sheets")
519
+ if len(sheet_names) == 1:
520
+ return sheet_names
521
+ sheet_cols = {
522
+ sn: pl.read_excel(file_path, sheet_name=sn).columns
523
+ for sn in sheet_names
524
+ }
525
+ grouped = {}
526
+ for k, v in sheet_cols.items():
527
+ grouped.setdefault(v, []).append(k)
528
+ return list(grouped.values())
529
+
530
+
531
+ def load_excel(
532
+ file_path: Path | str,
533
+ sheet_name: str = "Sheet1",
534
+ **kwargs
535
+ ) -> pl.DataFrame:
536
+ """
537
+ Load data from an Excel file.
538
+
539
+ Args:
540
+ file_path: Path to the Excel file
541
+ sheet_name: Name of the sheet to load (default: "Sheet1"),
542
+ special value "@all" load all sheets,"@most" load sheets with most columns
543
+ **kwargs: Additional arguments to pass to polars.read_excel
544
+
545
+ Returns:
546
+ pl.DataFrame: Loaded data
547
+ """
548
+ sheet_names = excel_sheet_names(file_path)
549
+ sheet_parts = _get_excel_samecolumns_sheet(file_path)
550
+ if sheet_name.lower() == "@all" :
551
+ if len(sheet_parts) != 1 :
552
+ raise ValueError("Excel file has multiple sheets with different columns")
553
+ df = pl.concat([
554
+ pl.read_excel(file_path, sheet_name=sn, **kwargs)
555
+ for sn in sheet_names
556
+ ])
557
+ return df
558
+ elif sheet_name.lower() == "@most" :
559
+ sheet_name = []
560
+ for i in sheet_parts:
561
+ if len(i) > len(sheet_name):
562
+ sheet_name = i
563
+ df = pl.concat([
564
+ pl.read_excel(file_path, sheet_name=sn, **kwargs)
565
+ for sn in sheet_name
566
+ ])
567
+ elif sheet_name in sheet_names:
568
+ df = pl.read_excel(file_path, sheet_name=sheet_name, **kwargs)
569
+ else:
570
+ raise ValueError(f"Sheet {sheet_name} not found in Excel file")
571
+ return df
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes