simtoolsz 0.2.9__tar.gz → 0.2.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/PKG-INFO +1 -1
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/pyproject.toml +1 -1
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/__init__.py +1 -1
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/reader.py +91 -6
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/.github/workflows/publish.yml +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/.gitignore +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/.python-version +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/LICENSE +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/README.md +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/README_EN.md +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/docs/DATETIME_CONVERSION.md +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/docs/mail_usage_guide.md +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/docs/special2db_usage.md +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/examples/conversion_examples.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/examples/mail_examples.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/examples/special2db_example.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/examples/today_examples.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/examples/zip2db_example.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/requirements-dev.lock +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/requirements.lock +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/datetime.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/db.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/mail.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/src/simtoolsz/utils.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/test_optimized_reader.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_conversion.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_iso_comprehensive.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_iso_format.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_simple.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_special2db.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_special2db_simple.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_today_optimized.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_which_format.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_zip2db.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/test_zip2db_simple.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/tests/verify_unicode_fix.py +0 -0
- {simtoolsz-0.2.9 → simtoolsz-0.2.11}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: simtoolsz
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.11
|
|
4
4
|
Summary: A simple and convenient toolkit containing useful functions, classes, and methods.
|
|
5
5
|
Project-URL: Homepage, https://github.com/SidneyLYZhang/simtoolsz
|
|
6
6
|
Project-URL: Repository, https://github.com/SidneyLYZhang/simtoolsz.git
|
|
@@ -10,7 +10,7 @@ import simtoolsz.reader as reader
|
|
|
10
10
|
try:
|
|
11
11
|
__version__ = importlib.metadata.version("simtoolsz")
|
|
12
12
|
except importlib.metadata.PackageNotFoundError:
|
|
13
|
-
__version__ = "0.2.
|
|
13
|
+
__version__ = "0.2.11"
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
16
16
|
'__version__', 'mail', 'utils', 'datetime', 'db', 'reader'
|
|
@@ -12,7 +12,7 @@ from tempfile import TemporaryDirectory
|
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
14
14
|
"read_tsv", "scan_tsv", "getreader", "load_data", "read_archive",
|
|
15
|
-
"is_archive_file"
|
|
15
|
+
"is_archive_file", "excel_sheet_names", "load_excel"
|
|
16
16
|
]
|
|
17
17
|
|
|
18
18
|
def read_tsv(filepath: Path, lazy: bool = False, **kwargs
|
|
@@ -233,6 +233,8 @@ def _is_archive_file(file_path: Path) -> bool:
|
|
|
233
233
|
"""
|
|
234
234
|
if not file_path.is_file():
|
|
235
235
|
return False
|
|
236
|
+
if file_path.suffix in ('.xlsx', '.xls', '.ods'):
|
|
237
|
+
return False
|
|
236
238
|
return is_zipfile(file_path) or is_tarfile(file_path)
|
|
237
239
|
|
|
238
240
|
|
|
@@ -478,9 +480,92 @@ def load_data(
|
|
|
478
480
|
if transtype is not None:
|
|
479
481
|
if isinstance(transtype, pl.Expr):
|
|
480
482
|
df = df.with_columns(transtype)
|
|
481
|
-
|
|
483
|
+
if isinstance(transtype, list):
|
|
482
484
|
df = df.with_columns(*transtype)
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
485
|
+
return df
|
|
486
|
+
|
|
487
|
+
def excel_sheet_names(
|
|
488
|
+
file_path: Path | str
|
|
489
|
+
) -> list[str]:
|
|
490
|
+
"""
|
|
491
|
+
Get the names of all sheets in an Excel file.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
file_path: Path to the Excel file
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
list[str]: Names of all sheets in the Excel file
|
|
498
|
+
"""
|
|
499
|
+
with ZipFile(file_path, 'r') as zf:
|
|
500
|
+
xml_content = zf.read("xl/workbook.xml").decode('utf-8')
|
|
501
|
+
sheet_names = re.findall(r'<sheet name="([^"]+)"', xml_content)
|
|
502
|
+
return sheet_names
|
|
503
|
+
|
|
504
|
+
def _get_excel_samecolumns_sheet(
|
|
505
|
+
file_path: Path | str
|
|
506
|
+
) -> list[list[str]] :
|
|
507
|
+
"""
|
|
508
|
+
Get the names of all sheets in an Excel file that have the same columns.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
file_path: Path to the Excel file
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
list[list[str]]: Names of all sheets in the Excel file that have the same columns.
|
|
515
|
+
"""
|
|
516
|
+
sheet_names = excel_sheet_names(file_path)
|
|
517
|
+
if len(sheet_names) == 0:
|
|
518
|
+
raise ValueError("Excel file has no sheets")
|
|
519
|
+
if len(sheet_names) == 1:
|
|
520
|
+
return sheet_names
|
|
521
|
+
sheet_cols = {
|
|
522
|
+
sn: pl.read_excel(file_path, sheet_name=sn).columns
|
|
523
|
+
for sn in sheet_names
|
|
524
|
+
}
|
|
525
|
+
grouped = {}
|
|
526
|
+
for k, v in sheet_cols.items():
|
|
527
|
+
grouped.setdefault(v, []).append(k)
|
|
528
|
+
return list(grouped.values())
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def load_excel(
|
|
532
|
+
file_path: Path | str,
|
|
533
|
+
sheet_name: str = "Sheet1",
|
|
534
|
+
**kwargs
|
|
535
|
+
) -> pl.DataFrame:
|
|
536
|
+
"""
|
|
537
|
+
Load data from an Excel file.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
file_path: Path to the Excel file
|
|
541
|
+
sheet_name: Name of the sheet to load (default: "Sheet1"),
|
|
542
|
+
special value "@all" load all sheets,"@most" load sheets with most columns
|
|
543
|
+
**kwargs: Additional arguments to pass to polars.read_excel
|
|
544
|
+
|
|
545
|
+
Returns:
|
|
546
|
+
pl.DataFrame: Loaded data
|
|
547
|
+
"""
|
|
548
|
+
sheet_names = excel_sheet_names(file_path)
|
|
549
|
+
sheet_parts = _get_excel_samecolumns_sheet(file_path)
|
|
550
|
+
if sheet_name.lower() == "@all" :
|
|
551
|
+
if len(sheet_parts) != 1 :
|
|
552
|
+
raise ValueError("Excel file has multiple sheets with different columns")
|
|
553
|
+
df = pl.concat([
|
|
554
|
+
pl.read_excel(file_path, sheet_name=sn, **kwargs)
|
|
555
|
+
for sn in sheet_names
|
|
556
|
+
])
|
|
557
|
+
return df
|
|
558
|
+
elif sheet_name.lower() == "@most" :
|
|
559
|
+
sheet_name = []
|
|
560
|
+
for i in sheet_parts:
|
|
561
|
+
if len(i) > len(sheet_name):
|
|
562
|
+
sheet_name = i
|
|
563
|
+
df = pl.concat([
|
|
564
|
+
pl.read_excel(file_path, sheet_name=sn, **kwargs)
|
|
565
|
+
for sn in sheet_name
|
|
566
|
+
])
|
|
567
|
+
elif sheet_name in sheet_names:
|
|
568
|
+
df = pl.read_excel(file_path, sheet_name=sheet_name, **kwargs)
|
|
569
|
+
else:
|
|
570
|
+
raise ValueError(f"Sheet {sheet_name} not found in Excel file")
|
|
571
|
+
return df
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|