pathpilot 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pathpilot-0.2.0 → pathpilot-0.2.2}/PKG-INFO +4 -4
- {pathpilot-0.2.0 → pathpilot-0.2.2}/pathpilot/__init__.py +1 -1
- pathpilot-0.2.2/pathpilot/_file/__init__.py +10 -0
- {pathpilot-0.2.0/pathpilot → pathpilot-0.2.2/pathpilot/_file}/_csv.py +1 -1
- {pathpilot-0.2.0/pathpilot → pathpilot-0.2.2/pathpilot/_file}/_excel.py +103 -62
- {pathpilot-0.2.0/pathpilot → pathpilot-0.2.2/pathpilot/_file}/_pickle.py +1 -1
- {pathpilot-0.2.0/pathpilot → pathpilot-0.2.2/pathpilot/_file}/_sqlite.py +1 -1
- {pathpilot-0.2.0/pathpilot → pathpilot-0.2.2/pathpilot/_file}/_text.py +1 -1
- {pathpilot-0.2.0/pathpilot → pathpilot-0.2.2/pathpilot/_file}/_zip.py +3 -3
- pathpilot-0.2.0/pathpilot/_file.py → pathpilot-0.2.2/pathpilot/_file/base.py +11 -18
- pathpilot-0.2.2/pathpilot/_file/utils.py +15 -0
- pathpilot-0.2.2/pathpilot/_folder/__init__.py +7 -0
- pathpilot-0.2.2/pathpilot/_folder/backup.py +90 -0
- pathpilot-0.2.0/pathpilot/_folder.py → pathpilot-0.2.2/pathpilot/_folder/base.py +13 -17
- pathpilot-0.2.2/pathpilot/_folder/utils.py +30 -0
- {pathpilot-0.2.0 → pathpilot-0.2.2}/pathpilot/core.py +7 -9
- pathpilot-0.2.2/pathpilot/utils.py +71 -0
- {pathpilot-0.2.0 → pathpilot-0.2.2}/pyproject.toml +4 -4
- pathpilot-0.2.0/pathpilot/utils.py +0 -199
- {pathpilot-0.2.0 → pathpilot-0.2.2}/LICENSE +0 -0
- {pathpilot-0.2.0 → pathpilot-0.2.2}/README.md +0 -0
- {pathpilot-0.2.0 → pathpilot-0.2.2}/pathpilot/decorators.py +0 -0
- {pathpilot-0.2.0 → pathpilot-0.2.2}/pathpilot/exceptions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pathpilot
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Library that facilitates file and folder manipulation in Python.
|
|
5
5
|
Home-page: https://github.com/zteinck/pathpilot
|
|
6
6
|
License: MIT
|
|
@@ -15,10 +15,10 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
17
|
Requires-Dist: XlsxWriter
|
|
18
|
-
Requires-Dist: cachegrab
|
|
19
|
-
Requires-Dist: clockwork
|
|
18
|
+
Requires-Dist: cachegrab (>=0.2.2)
|
|
19
|
+
Requires-Dist: clockwork (>=0.2.2)
|
|
20
20
|
Requires-Dist: numpy
|
|
21
|
-
Requires-Dist: oddments
|
|
21
|
+
Requires-Dist: oddments (>=0.2.0)
|
|
22
22
|
Requires-Dist: pandas
|
|
23
23
|
Project-URL: Repository, https://github.com/zteinck/pathpilot
|
|
24
24
|
Description-Content-Type: text/markdown
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from collections import OrderedDict
|
|
2
2
|
from functools import cached_property
|
|
3
3
|
from copy import deepcopy
|
|
4
|
+
import datetime
|
|
4
5
|
import sys
|
|
5
6
|
import re
|
|
6
7
|
import pandas as pd
|
|
@@ -8,7 +9,7 @@ import numpy as np
|
|
|
8
9
|
import oddments as odd
|
|
9
10
|
from cachegrab import sha256
|
|
10
11
|
|
|
11
|
-
from .
|
|
12
|
+
from .base import FileBase
|
|
12
13
|
from .utils import get_size_label
|
|
13
14
|
|
|
14
15
|
|
|
@@ -68,13 +69,13 @@ class ExcelFile(FileBase):
|
|
|
68
69
|
#╰-------------------------------------------------------------------------╯
|
|
69
70
|
|
|
70
71
|
def __init__(
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
72
|
+
self,
|
|
73
|
+
f,
|
|
74
|
+
number_tabs=False,
|
|
75
|
+
verbose=True,
|
|
76
|
+
troubleshoot=False,
|
|
77
|
+
**kwargs
|
|
78
|
+
):
|
|
78
79
|
super().__init__(f, **kwargs)
|
|
79
80
|
self.format_cache = dict()
|
|
80
81
|
self.sheet_cache = dict()
|
|
@@ -511,7 +512,7 @@ class ExcelFile(FileBase):
|
|
|
511
512
|
of the list/tuple is shorter than the number
|
|
512
513
|
of columns then no format is applied to the
|
|
513
514
|
remaining columns.
|
|
514
|
-
• None ➜ no formatting is applied
|
|
515
|
+
• None ➜ no formatting is applied.
|
|
515
516
|
inverse : bool
|
|
516
517
|
If True, the 2D data is inverted such that each sub-list is treated
|
|
517
518
|
as column data as opposed to row data under the default behavior.
|
|
@@ -555,10 +556,14 @@ class ExcelFile(FileBase):
|
|
|
555
556
|
updates = dict()
|
|
556
557
|
|
|
557
558
|
if outer_border:
|
|
558
|
-
if col == 0:
|
|
559
|
-
|
|
560
|
-
if
|
|
561
|
-
|
|
559
|
+
if col == 0:
|
|
560
|
+
updates['left'] = 1
|
|
561
|
+
if col == n_cols - 1:
|
|
562
|
+
updates['right'] = 1
|
|
563
|
+
if row == 0:
|
|
564
|
+
updates['top'] = 1
|
|
565
|
+
if row == n_rows - 1:
|
|
566
|
+
updates['bottom'] = 1
|
|
562
567
|
fmt.update(updates)
|
|
563
568
|
|
|
564
569
|
return self.get_format(fmt)
|
|
@@ -630,6 +635,7 @@ class ExcelFile(FileBase):
|
|
|
630
635
|
header_format='pandas_header',
|
|
631
636
|
data_format='auto',
|
|
632
637
|
column_widths='auto',
|
|
638
|
+
date_format=None,
|
|
633
639
|
normalize=True,
|
|
634
640
|
autofilter=False,
|
|
635
641
|
raise_on_empty=True,
|
|
@@ -642,10 +648,10 @@ class ExcelFile(FileBase):
|
|
|
642
648
|
'''
|
|
643
649
|
Description
|
|
644
650
|
------------
|
|
645
|
-
Writes a DataFrame to an Excel worksheet. This
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
651
|
+
Writes a DataFrame to an Excel worksheet. This is an alternative to df.to_excel()
|
|
652
|
+
that addresses some of its limitations such as not being able format cells that
|
|
653
|
+
already have a format including the index, headers, and cells that contain dates
|
|
654
|
+
or datetimes.
|
|
649
655
|
|
|
650
656
|
Parameters
|
|
651
657
|
------------
|
|
@@ -684,6 +690,14 @@ class ExcelFile(FileBase):
|
|
|
684
690
|
• dict ➜ dictionary where keys are DataFrame column names and values
|
|
685
691
|
are column widths. Any column names excluded from the
|
|
686
692
|
dictionary will not have their widths set.
|
|
693
|
+
date_format : None | str | dict
|
|
694
|
+
Defines how date-like columns are parsed when data_format='auto'
|
|
695
|
+
(e.g. '%Y-%m-%d'). Options include:
|
|
696
|
+
• None ➜ format is inferred.
|
|
697
|
+
• str ➜ used for all date-like columns.
|
|
698
|
+
• dict ➜ dictionary where keys are DataFrame column names and values
|
|
699
|
+
are formats. Any column names excluded from the dictionary
|
|
700
|
+
default to None.
|
|
687
701
|
normalize : bool
|
|
688
702
|
if True, any date columns where the hours, minutes, seconds,
|
|
689
703
|
microseconds are all set to zero (midnight) will be converted
|
|
@@ -714,35 +728,33 @@ class ExcelFile(FileBase):
|
|
|
714
728
|
None
|
|
715
729
|
'''
|
|
716
730
|
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
731
|
+
df = odd.coerce_dataframe(df)
|
|
732
|
+
|
|
733
|
+
odd.validate_value(
|
|
734
|
+
value=date_format,
|
|
735
|
+
attr='date_format',
|
|
736
|
+
types=(str, dict),
|
|
737
|
+
none_ok=True
|
|
738
|
+
)
|
|
724
739
|
|
|
725
740
|
# kwargs housekeeping
|
|
726
|
-
if kwargs.get('inverse'):
|
|
741
|
+
if kwargs.get('inverse'):
|
|
742
|
+
raise NotImplementedError
|
|
727
743
|
|
|
728
744
|
# Reset index
|
|
729
|
-
if
|
|
745
|
+
if odd.get_index_names(df):
|
|
746
|
+
df.reset_index(inplace=True)
|
|
730
747
|
|
|
731
748
|
# Check if empty
|
|
732
749
|
if df.empty:
|
|
733
750
|
if raise_on_empty:
|
|
734
751
|
raise ValueError("'df' argument cannot be empty.")
|
|
735
|
-
if
|
|
752
|
+
if len(df.columns) == 0:
|
|
736
753
|
raise ValueError("'df' argument must have an index or columns")
|
|
737
754
|
total_row, total_column = False, False
|
|
738
755
|
|
|
739
756
|
# Check for duplicate column names
|
|
740
|
-
|
|
741
|
-
dupes = s[ s > 1 ].to_frame()
|
|
742
|
-
if len(dupes) > 0:
|
|
743
|
-
raise ValueError(
|
|
744
|
-
f"'df' argument cannot have duplicate column names: \n\n{dupes}\n"
|
|
745
|
-
)
|
|
757
|
+
odd.verify_no_duplicates(df=df, attr='columns')
|
|
746
758
|
|
|
747
759
|
# Add a total column to dataframe
|
|
748
760
|
if total_column:
|
|
@@ -763,29 +775,16 @@ class ExcelFile(FileBase):
|
|
|
763
775
|
|
|
764
776
|
numeric_columns -= percent_columns
|
|
765
777
|
|
|
766
|
-
datelike_columns = set(
|
|
778
|
+
datelike_columns = set(
|
|
779
|
+
df.select_dtypes(include=[np.datetime64]).columns.tolist()
|
|
780
|
+
)
|
|
767
781
|
|
|
768
782
|
for k in df.columns:
|
|
769
783
|
if isinstance(k, str) and \
|
|
770
|
-
|
|
771
|
-
|
|
784
|
+
odd.column_name_is_datelike(k) and \
|
|
785
|
+
str(df[k].dtype) != 'timedelta64[ns]':
|
|
772
786
|
datelike_columns.add(k)
|
|
773
787
|
|
|
774
|
-
date_columns, datetime_columns = [], []
|
|
775
|
-
for k in list(datelike_columns):
|
|
776
|
-
if not np.issubdtype(df[k].dtype, np.datetime64):
|
|
777
|
-
try:
|
|
778
|
-
df[k] = pd.to_datetime(df[k])
|
|
779
|
-
except:
|
|
780
|
-
datelike_columns.remove(k)
|
|
781
|
-
|
|
782
|
-
for k in datelike_columns:
|
|
783
|
-
if normalize and (df[k].dropna() == df[k].dropna().dt.normalize()).all():
|
|
784
|
-
df[k] = df[k].dt.date
|
|
785
|
-
date_columns.append(k)
|
|
786
|
-
else:
|
|
787
|
-
datetime_columns.append(k)
|
|
788
|
-
|
|
789
788
|
# Parse start cell
|
|
790
789
|
start_col, start_row = self.parse_start_cell(start_cell)
|
|
791
790
|
|
|
@@ -801,20 +800,25 @@ class ExcelFile(FileBase):
|
|
|
801
800
|
# Force data_format to comply with the standard {column name : format}
|
|
802
801
|
if isinstance(data_format, dict):
|
|
803
802
|
if not all(k in df.columns for k in data_format):
|
|
804
|
-
if any(isinstance(v, (list, tuple, dict))
|
|
803
|
+
if any(isinstance(v, (list, tuple, dict))
|
|
804
|
+
for v in data_format.values()):
|
|
805
805
|
raise ValueError
|
|
806
806
|
data_format = {k: data_format for k in df.columns}
|
|
807
807
|
|
|
808
|
-
#
|
|
808
|
+
# Auto-detects the best format for each DataFrame column
|
|
809
809
|
if data_format == 'auto':
|
|
810
810
|
data_format = dict()
|
|
811
811
|
|
|
812
812
|
# cascade auto formatting
|
|
813
|
-
if total_row_format is None:
|
|
814
|
-
|
|
813
|
+
if total_row_format is None:
|
|
814
|
+
total_row_format = 'auto'
|
|
815
|
+
|
|
816
|
+
if total_column_format is None:
|
|
817
|
+
total_column_format = 'auto'
|
|
815
818
|
|
|
816
819
|
infer_format = lambda fmt, s: \
|
|
817
|
-
fmt if s.sum() - s.round().sum() == 0
|
|
820
|
+
fmt if s.sum() - s.round().sum() == 0 \
|
|
821
|
+
else f'{fmt}_two_decimals'
|
|
818
822
|
|
|
819
823
|
for k in numeric_columns:
|
|
820
824
|
if not df[k].isna().all():
|
|
@@ -831,14 +835,51 @@ class ExcelFile(FileBase):
|
|
|
831
835
|
s *= 100
|
|
832
836
|
data_format[k] = infer_format('percent', s)
|
|
833
837
|
|
|
834
|
-
|
|
835
|
-
|
|
838
|
+
if isinstance(date_format, dict):
|
|
839
|
+
date_formats = {}
|
|
840
|
+
for k in datelike_columns:
|
|
841
|
+
v = date_format.get(k)
|
|
842
|
+
if v is not None:
|
|
843
|
+
odd.validate_value(value=v, types=str)
|
|
844
|
+
date_formats[k] = v
|
|
845
|
+
else:
|
|
846
|
+
date_formats = {k: date_format for k in datelike_columns}
|
|
847
|
+
|
|
848
|
+
datetime_columns = set()
|
|
849
|
+
|
|
850
|
+
for k in list(datelike_columns):
|
|
851
|
+
if np.issubdtype(df[k].dtype, np.datetime64): continue
|
|
852
|
+
fmt = date_formats.get(k)
|
|
853
|
+
try:
|
|
854
|
+
df[k] = pd.to_datetime(df[k], format=fmt)
|
|
855
|
+
except:
|
|
856
|
+
if fmt is None:
|
|
857
|
+
datelike_columns.remove(k)
|
|
858
|
+
else:
|
|
859
|
+
df[k] = df[k].apply(odd.ignore_nan(lambda x: \
|
|
860
|
+
datetime.datetime.strptime(x, fmt)
|
|
861
|
+
))
|
|
862
|
+
datetime_columns.add(k)
|
|
863
|
+
|
|
864
|
+
for k in datelike_columns:
|
|
865
|
+
data_format[k] = 'datetime'
|
|
866
|
+
if not normalize: continue
|
|
867
|
+
s = df[k].dropna()
|
|
868
|
+
if s.empty: continue
|
|
869
|
+
if k in datetime_columns:
|
|
870
|
+
if all(x.time() == datetime.time(0, 0) for x in s.values):
|
|
871
|
+
df[k] = df[k].apply(odd.ignore_nan(lambda x: x.date()))
|
|
872
|
+
data_format[k] = 'date'
|
|
873
|
+
else:
|
|
874
|
+
if (s == s.dt.normalize()).all():
|
|
875
|
+
df[k] = df[k].dt.date
|
|
876
|
+
data_format[k] = 'date'
|
|
836
877
|
|
|
837
878
|
if isinstance(data_format, str):
|
|
838
879
|
data_format = {k: data_format for k in df.columns}
|
|
839
880
|
|
|
840
881
|
if isinstance(data_format, (list, tuple)):
|
|
841
|
-
data_format = {k: v for k,v in zip(df.columns, data_format)}
|
|
882
|
+
data_format = {k: v for k, v in zip(df.columns, data_format)}
|
|
842
883
|
|
|
843
884
|
if data_format is not None and not isinstance(data_format, dict):
|
|
844
885
|
raise TypeError(
|
|
@@ -882,8 +923,8 @@ class ExcelFile(FileBase):
|
|
|
882
923
|
start_cell=(start_col + 1, start_row + 2),
|
|
883
924
|
data=df.replace([np.inf, -np.inf], np.nan)\
|
|
884
925
|
.where(df.notnull(), None).values.tolist(),
|
|
885
|
-
formatting=
|
|
886
|
-
|
|
926
|
+
formatting=None if data_format is None else \
|
|
927
|
+
[data_format.get(k) for k in df.columns],
|
|
887
928
|
sheet=sheet,
|
|
888
929
|
**kwargs
|
|
889
930
|
)
|
|
@@ -1023,7 +1064,7 @@ class ExcelFile(FileBase):
|
|
|
1023
1064
|
OrderedDict((i, k) for i, k in enumerate(sorted(list(set(x)))))
|
|
1024
1065
|
|
|
1025
1066
|
counter = build_counter(rows) if down else build_counter(cols)
|
|
1026
|
-
inverse_counter = {v: k for k,v in counter.items()}
|
|
1067
|
+
inverse_counter = {v: k for k, v in counter.items()}
|
|
1027
1068
|
|
|
1028
1069
|
for c, r in zip(cols, rows):
|
|
1029
1070
|
x, y = self.get_column_letter(c - 1), str(r)
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import zipfile
|
|
2
2
|
import shutil
|
|
3
3
|
|
|
4
|
-
from
|
|
5
|
-
from
|
|
6
|
-
from .
|
|
4
|
+
from .._folder import Folder
|
|
5
|
+
from ..utils import is_file, is_folder
|
|
6
|
+
from .base import FileBase
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class ZipFile(FileBase):
|
|
@@ -5,28 +5,21 @@ import os
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import numpy as np
|
|
7
7
|
import oddments as odd
|
|
8
|
+
import clockwork as cw
|
|
8
9
|
from cachegrab import sha256
|
|
9
10
|
|
|
10
|
-
from
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
day_of_week,
|
|
14
|
-
year_end,
|
|
15
|
-
Date,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
from .decorators import check_read_only
|
|
19
|
-
from .exceptions import ReadOnlyError
|
|
11
|
+
from .._folder import Folder
|
|
12
|
+
from ..decorators import check_read_only
|
|
13
|
+
from ..exceptions import ReadOnlyError
|
|
20
14
|
|
|
21
|
-
from
|
|
22
|
-
get_size_label,
|
|
15
|
+
from ..utils import (
|
|
23
16
|
trifurcate,
|
|
24
17
|
is_file,
|
|
25
18
|
get_created_date,
|
|
26
19
|
get_modified_date,
|
|
27
20
|
)
|
|
28
21
|
|
|
29
|
-
from .
|
|
22
|
+
from .utils import get_size_label
|
|
30
23
|
|
|
31
24
|
|
|
32
25
|
class FileBase(object):
|
|
@@ -513,7 +506,7 @@ class FileBase(object):
|
|
|
513
506
|
# helper functions adding timestamps to files
|
|
514
507
|
@Decorators.add_timestamp
|
|
515
508
|
def quarter(self, delta=0):
|
|
516
|
-
return quarter_end(delta=delta).label
|
|
509
|
+
return cw.quarter_end(delta=delta).label
|
|
517
510
|
|
|
518
511
|
|
|
519
512
|
def qtr(self, *args, **kwargs):
|
|
@@ -522,21 +515,21 @@ class FileBase(object):
|
|
|
522
515
|
|
|
523
516
|
@Decorators.add_timestamp
|
|
524
517
|
def month(self, delta=0):
|
|
525
|
-
return month_end(delta=delta).ymd
|
|
518
|
+
return cw.month_end(delta=delta).ymd
|
|
526
519
|
|
|
527
520
|
|
|
528
521
|
@Decorators.add_timestamp
|
|
529
522
|
def day(self, weekday, delta=0):
|
|
530
|
-
return day_of_week(weekday=weekday, delta=delta).ymd
|
|
523
|
+
return cw.day_of_week(weekday=weekday, delta=delta).ymd
|
|
531
524
|
|
|
532
525
|
|
|
533
526
|
@Decorators.add_timestamp
|
|
534
527
|
def year(self, delta=0):
|
|
535
|
-
return str(year_end(delta=delta).year)
|
|
528
|
+
return str(cw.year_end(delta=delta).year)
|
|
536
529
|
|
|
537
530
|
|
|
538
531
|
@Decorators.add_timestamp
|
|
539
532
|
def timestamp(self, normalize=False, week_offset=0, fmt=None):
|
|
540
|
-
now = Date(normalize=normalize, week_offset=week_offset)
|
|
533
|
+
now = cw.Date(normalize=normalize, week_offset=week_offset)
|
|
541
534
|
now = str(now).replace(':','.') if fmt is None else now.str(fmt)
|
|
542
535
|
return now
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import math
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_size_label(size_in_bytes, decimal_places=2):
|
|
5
|
+
|
|
6
|
+
units = ('','K','M','G','T','P','E','Z','Y')
|
|
7
|
+
conversion_factor = 1024
|
|
8
|
+
|
|
9
|
+
if size_in_bytes == 0:
|
|
10
|
+
index, size = 0, 0
|
|
11
|
+
else:
|
|
12
|
+
index = int(math.floor(math.log(size_in_bytes, conversion_factor)))
|
|
13
|
+
size = size_in_bytes / math.pow(conversion_factor, index)
|
|
14
|
+
|
|
15
|
+
return f'{size:,.{decimal_places}f} {units[index]}B'
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
import filecmp
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def backup_folder(
|
|
7
|
+
origin,
|
|
8
|
+
destination,
|
|
9
|
+
overwrite=True,
|
|
10
|
+
shallow=True,
|
|
11
|
+
verbose=True
|
|
12
|
+
):
|
|
13
|
+
'''
|
|
14
|
+
Description
|
|
15
|
+
------------
|
|
16
|
+
Backs up the the folders and files in the 'origin' folder to the
|
|
17
|
+
'destination' folder. Files in 'destination' are overwritten if they are
|
|
18
|
+
different than files of the same name in 'origin' according to filecmp.cmp
|
|
19
|
+
(e.g. doc.xlsx exists in both directories but the version in 'origin' was
|
|
20
|
+
updated since the last time a backup was performed.)
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
------------
|
|
24
|
+
origin : str | Folder
|
|
25
|
+
folder to backup
|
|
26
|
+
destination : str | Folder
|
|
27
|
+
backup folder
|
|
28
|
+
overwrite : bool
|
|
29
|
+
if True, if the destination file already exists and it is different than
|
|
30
|
+
the origin file, it will be overwritten.
|
|
31
|
+
If False, overlapping files are ignored.
|
|
32
|
+
shallow : bool
|
|
33
|
+
filecmp.cmp(f1, f2, shallow=True) shallow argument.
|
|
34
|
+
"If shallow is true and the os.stat() signatures (file type, size, and
|
|
35
|
+
modification time) of both files are identical, the files are taken to be
|
|
36
|
+
equal."
|
|
37
|
+
https://docs.python.org/3/library/filecmp.html
|
|
38
|
+
verbose : bool
|
|
39
|
+
if True, all folders and files that were backed up or overwritten are
|
|
40
|
+
printed.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
------------
|
|
44
|
+
None
|
|
45
|
+
'''
|
|
46
|
+
|
|
47
|
+
def format_path(path):
|
|
48
|
+
path = str(path).replace('\\','/')
|
|
49
|
+
if path[-1] != '/': path = path + '/'
|
|
50
|
+
return path
|
|
51
|
+
|
|
52
|
+
origin = format_path(origin)
|
|
53
|
+
destination = format_path(destination)
|
|
54
|
+
if not os.path.exists(destination): os.mkdir(destination)
|
|
55
|
+
|
|
56
|
+
for path, folders, files in os.walk(origin):
|
|
57
|
+
from_path = format_path(path)
|
|
58
|
+
to_path = from_path.replace(origin, destination)
|
|
59
|
+
|
|
60
|
+
for file in files:
|
|
61
|
+
copy_file = False
|
|
62
|
+
from_file = from_path + file
|
|
63
|
+
to_file = to_path + file
|
|
64
|
+
text = to_file.replace(destination, '/')
|
|
65
|
+
|
|
66
|
+
if os.path.exists(to_file):
|
|
67
|
+
if not filecmp.cmp(
|
|
68
|
+
from_file,
|
|
69
|
+
to_file,
|
|
70
|
+
shallow=shallow
|
|
71
|
+
) and overwrite:
|
|
72
|
+
action = 'Overwrite'
|
|
73
|
+
copy_file = True
|
|
74
|
+
else:
|
|
75
|
+
action = 'BackingUp'
|
|
76
|
+
copy_file = True
|
|
77
|
+
|
|
78
|
+
if copy_file:
|
|
79
|
+
try:
|
|
80
|
+
shutil.copyfile(from_file, to_file)
|
|
81
|
+
if verbose: print(f'{action}: {text}')
|
|
82
|
+
except Exception as e:
|
|
83
|
+
if verbose: print(e)
|
|
84
|
+
|
|
85
|
+
for folder in folders:
|
|
86
|
+
to_folder = to_path + folder
|
|
87
|
+
text = to_folder.replace(destination, '/') + '/'
|
|
88
|
+
if not os.path.exists(to_folder):
|
|
89
|
+
os.mkdir(to_folder)
|
|
90
|
+
if verbose: print(f'BackingUp: {text}')
|
|
@@ -5,19 +5,14 @@ import pandas as pd
|
|
|
5
5
|
import oddments as odd
|
|
6
6
|
from cachegrab import sha256
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
day_of_week,
|
|
12
|
-
year_end,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
from clockwork.utils import convert_date_format_to_regex
|
|
8
|
+
import clockwork as cw
|
|
9
|
+
from clockwork.utils import \
|
|
10
|
+
convert_date_format_to_regex
|
|
16
11
|
|
|
17
|
-
from
|
|
18
|
-
from
|
|
12
|
+
from ..decorators import check_read_only
|
|
13
|
+
from ..exceptions import ReadOnlyError
|
|
19
14
|
|
|
20
|
-
from
|
|
15
|
+
from ..utils import (
|
|
21
16
|
trifurcate,
|
|
22
17
|
trifurcate_and_join,
|
|
23
18
|
is_file,
|
|
@@ -25,6 +20,9 @@ from .utils import (
|
|
|
25
20
|
get_cwd,
|
|
26
21
|
get_created_date,
|
|
27
22
|
get_modified_date,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
from .utils import (
|
|
28
26
|
create_folder,
|
|
29
27
|
delete_folder,
|
|
30
28
|
)
|
|
@@ -408,7 +406,6 @@ class Folder(object):
|
|
|
408
406
|
return False
|
|
409
407
|
|
|
410
408
|
|
|
411
|
-
|
|
412
409
|
class Folders(Contents):
|
|
413
410
|
'''
|
|
414
411
|
Description
|
|
@@ -429,7 +426,6 @@ class Folder(object):
|
|
|
429
426
|
self.folder._clear_subfolder_cache()
|
|
430
427
|
|
|
431
428
|
|
|
432
|
-
|
|
433
429
|
class Files(Contents):
|
|
434
430
|
'''
|
|
435
431
|
Description
|
|
@@ -847,19 +843,19 @@ class Folder(object):
|
|
|
847
843
|
|
|
848
844
|
# helper functions adding timestamps to folders
|
|
849
845
|
def quarter(self, delta=0, **kwargs):
|
|
850
|
-
return self.join(quarter_end(delta=delta).label, **kwargs)
|
|
846
|
+
return self.join(cw.quarter_end(delta=delta).label, **kwargs)
|
|
851
847
|
|
|
852
848
|
|
|
853
849
|
def month(self, delta=0, **kwargs):
|
|
854
|
-
return self.join(month_end(delta=delta).ymd, **kwargs)
|
|
850
|
+
return self.join(cw.month_end(delta=delta).ymd, **kwargs)
|
|
855
851
|
|
|
856
852
|
|
|
857
853
|
def day(self, weekday, delta=0, **kwargs):
|
|
858
|
-
return self.join(day_of_week(weekday=weekday, delta=delta).ymd, **kwargs)
|
|
854
|
+
return self.join(cw.day_of_week(weekday=weekday, delta=delta).ymd, **kwargs)
|
|
859
855
|
|
|
860
856
|
|
|
861
857
|
def year(self, delta=0, **kwargs):
|
|
862
|
-
return self.join(str(year_end(delta=delta).year), **kwargs)
|
|
858
|
+
return self.join(str(cw.year_end(delta=delta).year), **kwargs)
|
|
863
859
|
|
|
864
860
|
|
|
865
861
|
#╭-------------------------------------------------------------------------╮
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
|
|
4
|
+
from ..utils import is_folder
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _verify_is_folder(func):
|
|
8
|
+
|
|
9
|
+
def wrapper(f):
|
|
10
|
+
f = str(f)
|
|
11
|
+
if not is_folder(f):
|
|
12
|
+
raise TypeError(f"{f} is not a folder")
|
|
13
|
+
return func(f)
|
|
14
|
+
|
|
15
|
+
return wrapper
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@_verify_is_folder
|
|
19
|
+
def create_folder(f):
|
|
20
|
+
''' create folder if it does not already exist '''
|
|
21
|
+
if not os.path.exists(f):
|
|
22
|
+
os.mkdir(f)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@_verify_is_folder
|
|
26
|
+
def delete_folder(f):
|
|
27
|
+
''' delete folder if it exists '''
|
|
28
|
+
if os.path.exists(f):
|
|
29
|
+
shutil.rmtree(f)
|
|
30
|
+
|
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
from ._file import
|
|
2
|
-
from ._folder import
|
|
3
|
-
|
|
4
|
-
from .
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
from ._sqlite import SQLiteFile
|
|
9
|
-
from .utils import trifurcate, get_cwd
|
|
1
|
+
from ._file import *
|
|
2
|
+
from ._folder import *
|
|
3
|
+
|
|
4
|
+
from .utils import (
|
|
5
|
+
trifurcate,
|
|
6
|
+
get_cwd
|
|
7
|
+
)
|
|
10
8
|
|
|
11
9
|
|
|
12
10
|
#╭-------------------------------------------------------------------------╮
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import datetime
|
|
3
|
+
from clockwork import Date
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def split_extension(x):
|
|
7
|
+
''' separates file extention from rest of string '''
|
|
8
|
+
dot_index = x.rfind('.')
|
|
9
|
+
if dot_index == -1: # period not found
|
|
10
|
+
return x, ''
|
|
11
|
+
else:
|
|
12
|
+
ext = x[dot_index + 1:]
|
|
13
|
+
rest = x[:dot_index]
|
|
14
|
+
return rest, ext
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def is_file(f):
|
|
18
|
+
''' returns True if the argument is a file '''
|
|
19
|
+
return bool(trifurcate(f)[-1])
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def is_folder(f):
|
|
23
|
+
''' returns True if the argument is a folder '''
|
|
24
|
+
f = trifurcate(f)
|
|
25
|
+
return f[0] and not any(f[1:])
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def trifurcate_and_join(f):
|
|
29
|
+
''' split argument into its components (folder inferred if absent) and
|
|
30
|
+
combine them into one string '''
|
|
31
|
+
folder, name, ext = trifurcate(f)
|
|
32
|
+
return (folder + name + '.' + ext) if ext else folder
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def trifurcate(f, default_folder=True):
|
|
36
|
+
''' split argument into folder, file name, and file extension components '''
|
|
37
|
+
f = str(f).replace('\\','/').strip()
|
|
38
|
+
if not f: raise ValueError("'f' argument cannot be empty")
|
|
39
|
+
explicitly_folder = f[-1] == '/'
|
|
40
|
+
f = '/'.join([x for x in f.split('/') if x])
|
|
41
|
+
f = f + '/' if explicitly_folder or '.' not in f.split('/')[-1] else f
|
|
42
|
+
if f.split('/')[0][-4:].lower() == '.com': f = '//' + f
|
|
43
|
+
f = f.rsplit('/', 1)
|
|
44
|
+
folder = f[0] + '/' if len(f) == 2 else (get_cwd() if default_folder else '')
|
|
45
|
+
name, ext = split_extension(f[-1])
|
|
46
|
+
return folder, name, ext.lower()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_cwd():
|
|
50
|
+
pypath = os.getenv('PYTHONPATH')
|
|
51
|
+
f = os.getcwd() if pypath is None else pypath.split(os.pathsep)[0]
|
|
52
|
+
f = str(f).replace('\\','/') + '/'
|
|
53
|
+
return f
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _timestamp_to_date(func):
|
|
57
|
+
|
|
58
|
+
def wrapper(path):
|
|
59
|
+
return Date(datetime.datetime.fromtimestamp(func(path)))
|
|
60
|
+
|
|
61
|
+
return wrapper
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@_timestamp_to_date
|
|
65
|
+
def get_created_date(path):
|
|
66
|
+
return os.path.getctime(path)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@_timestamp_to_date
|
|
70
|
+
def get_modified_date(path):
|
|
71
|
+
return os.path.getmtime(path)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "pathpilot"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.2"
|
|
4
4
|
description = "Library that facilitates file and folder manipulation in Python."
|
|
5
5
|
authors = ["Zachary Einck <zacharyeinck@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -10,9 +10,9 @@ homepage = "https://github.com/zteinck/pathpilot"
|
|
|
10
10
|
|
|
11
11
|
[tool.poetry.dependencies]
|
|
12
12
|
python = "^3.8"
|
|
13
|
-
clockwork = "
|
|
14
|
-
cachegrab = "
|
|
15
|
-
oddments = "
|
|
13
|
+
clockwork = ">=0.2.2"
|
|
14
|
+
cachegrab = ">=0.2.2"
|
|
15
|
+
oddments = ">=0.2.0"
|
|
16
16
|
pandas = "*"
|
|
17
17
|
numpy = "*"
|
|
18
18
|
XlsxWriter = "*"
|
|
@@ -1,199 +0,0 @@
|
|
|
1
|
-
import shutil
|
|
2
|
-
import filecmp
|
|
3
|
-
import math
|
|
4
|
-
import os
|
|
5
|
-
import datetime
|
|
6
|
-
import inspect
|
|
7
|
-
from clockwork import Date
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def split_extension(x):
|
|
11
|
-
''' separates file extention from rest of string '''
|
|
12
|
-
dot_index = x.rfind('.')
|
|
13
|
-
if dot_index == -1: # period not found
|
|
14
|
-
return x, ''
|
|
15
|
-
else:
|
|
16
|
-
ext = x[dot_index + 1:]
|
|
17
|
-
rest = x[:dot_index]
|
|
18
|
-
return rest, ext
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def is_file(f):
|
|
22
|
-
''' returns True if the argument is a file '''
|
|
23
|
-
return bool(trifurcate(f)[-1])
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def is_folder(f):
|
|
27
|
-
''' returns True if the argument is a folder '''
|
|
28
|
-
f = trifurcate(f)
|
|
29
|
-
return f[0] and not any(f[1:])
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def trifurcate_and_join(f):
|
|
33
|
-
''' split argument into its components (folder inferred if absent) and
|
|
34
|
-
combine them into one string '''
|
|
35
|
-
folder, name, ext = trifurcate(f)
|
|
36
|
-
return (folder + name + '.' + ext) if ext else folder
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def trifurcate(f, default_folder=True):
|
|
40
|
-
''' split argument into folder, file name, and file extension components '''
|
|
41
|
-
f = str(f).replace('\\','/').strip()
|
|
42
|
-
if not f: raise ValueError("'f' argument cannot be empty")
|
|
43
|
-
explicitly_folder = f[-1] == '/'
|
|
44
|
-
f = '/'.join([x for x in f.split('/') if x])
|
|
45
|
-
f = f + '/' if explicitly_folder or '.' not in f.split('/')[-1] else f
|
|
46
|
-
if f.split('/')[0][-4:].lower() == '.com': f = '//' + f
|
|
47
|
-
f = f.rsplit('/', 1)
|
|
48
|
-
folder = f[0] + '/' if len(f) == 2 else (get_cwd() if default_folder else '')
|
|
49
|
-
name, ext = split_extension(f[-1])
|
|
50
|
-
return folder, name, ext.lower()
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def get_cwd():
|
|
54
|
-
pypath = os.getenv('PYTHONPATH')
|
|
55
|
-
f = os.getcwd() if pypath is None else pypath.split(os.pathsep)[0]
|
|
56
|
-
f = str(f).replace('\\','/') + '/'
|
|
57
|
-
return f
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def get_size_label(size_in_bytes, decimal_places=2):
|
|
61
|
-
|
|
62
|
-
units = ('','K','M','G','T','P','E','Z','Y')
|
|
63
|
-
conversion_factor = 1024
|
|
64
|
-
|
|
65
|
-
if size_in_bytes == 0:
|
|
66
|
-
index, size = 0, 0
|
|
67
|
-
else:
|
|
68
|
-
index = int(math.floor(math.log(size_in_bytes, conversion_factor)))
|
|
69
|
-
size = size_in_bytes / math.pow(conversion_factor, index)
|
|
70
|
-
|
|
71
|
-
return f'{size:,.{decimal_places}f} {units[index]}B'
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def backup_folder(origin, destination, overwrite=True, shallow=True, verbose=True):
|
|
75
|
-
'''
|
|
76
|
-
Description
|
|
77
|
-
------------
|
|
78
|
-
Backs up the the folders and files in the 'origin' folder to the
|
|
79
|
-
'destination' folder. Files in 'destination' are overwritten if they are
|
|
80
|
-
different than files of the same name in 'origin' according to filecmp.cmp
|
|
81
|
-
(e.g. doc.xlsx exists in both directories but the version in 'origin' was
|
|
82
|
-
updated since the last time a backup was performed.)
|
|
83
|
-
|
|
84
|
-
Parameters
|
|
85
|
-
------------
|
|
86
|
-
origin : str | Folder
|
|
87
|
-
folder to backup
|
|
88
|
-
destination : str | Folder
|
|
89
|
-
backup folder
|
|
90
|
-
overwrite : bool
|
|
91
|
-
if True, if the destination file already exists and it is different than
|
|
92
|
-
the origin file, it will be overwritten.
|
|
93
|
-
If False, overlapping files are ignored.
|
|
94
|
-
shallow : bool
|
|
95
|
-
filecmp.cmp(f1, f2, shallow=True) shallow argument.
|
|
96
|
-
"If shallow is true and the os.stat() signatures (file type, size, and
|
|
97
|
-
modification time) of both files are identical, the files are taken to be
|
|
98
|
-
equal."
|
|
99
|
-
https://docs.python.org/3/library/filecmp.html
|
|
100
|
-
verbose : bool
|
|
101
|
-
if True, all folders and files that were backed up or overwritten are printed.
|
|
102
|
-
|
|
103
|
-
Returns
|
|
104
|
-
------------
|
|
105
|
-
None
|
|
106
|
-
'''
|
|
107
|
-
|
|
108
|
-
def format_path(path):
|
|
109
|
-
path = str(path).replace('\\','/')
|
|
110
|
-
if path[-1] != '/': path = path + '/'
|
|
111
|
-
return path
|
|
112
|
-
|
|
113
|
-
origin = format_path(origin)
|
|
114
|
-
destination = format_path(destination)
|
|
115
|
-
if not os.path.exists(destination): os.mkdir(destination)
|
|
116
|
-
|
|
117
|
-
for path, folders, files in os.walk(origin):
|
|
118
|
-
from_path = format_path(path)
|
|
119
|
-
to_path = from_path.replace(origin, destination)
|
|
120
|
-
|
|
121
|
-
for file in files:
|
|
122
|
-
copy_file = False
|
|
123
|
-
from_file = from_path + file
|
|
124
|
-
to_file = to_path + file
|
|
125
|
-
text = to_file.replace(destination, '/')
|
|
126
|
-
|
|
127
|
-
if os.path.exists(to_file):
|
|
128
|
-
if not filecmp.cmp(
|
|
129
|
-
from_file,
|
|
130
|
-
to_file,
|
|
131
|
-
shallow=shallow
|
|
132
|
-
) and overwrite:
|
|
133
|
-
action = 'Overwrite'
|
|
134
|
-
copy_file = True
|
|
135
|
-
else:
|
|
136
|
-
action = 'BackingUp'
|
|
137
|
-
copy_file = True
|
|
138
|
-
|
|
139
|
-
if copy_file:
|
|
140
|
-
try:
|
|
141
|
-
shutil.copyfile(from_file, to_file)
|
|
142
|
-
if verbose: print(f'{action}: {text}')
|
|
143
|
-
except Exception as e:
|
|
144
|
-
if verbose: print(e)
|
|
145
|
-
|
|
146
|
-
for folder in folders:
|
|
147
|
-
to_folder = to_path + folder
|
|
148
|
-
text = to_folder.replace(destination, '/') + '/'
|
|
149
|
-
if not os.path.exists(to_folder):
|
|
150
|
-
os.mkdir(to_folder)
|
|
151
|
-
if verbose: print(f'BackingUp: {text}')
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def timestamp_to_date(func):
|
|
155
|
-
|
|
156
|
-
def wrapper(path):
|
|
157
|
-
return Date(datetime.datetime.fromtimestamp(func(path)))
|
|
158
|
-
|
|
159
|
-
return wrapper
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
@timestamp_to_date
|
|
163
|
-
def get_created_date(path):
|
|
164
|
-
return os.path.getctime(path)
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
@timestamp_to_date
|
|
168
|
-
def get_modified_date(path):
|
|
169
|
-
return os.path.getmtime(path)
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
def verify_is_folder(func):
|
|
173
|
-
|
|
174
|
-
def wrapper(f):
|
|
175
|
-
f = str(f)
|
|
176
|
-
if not is_folder(f):
|
|
177
|
-
raise TypeError(f"{f} is not a folder")
|
|
178
|
-
return func(f)
|
|
179
|
-
|
|
180
|
-
return wrapper
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
@verify_is_folder
|
|
184
|
-
def create_folder(f):
|
|
185
|
-
''' create folder if it does not already exist '''
|
|
186
|
-
if not os.path.exists(f):
|
|
187
|
-
os.mkdir(f)
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
@verify_is_folder
|
|
191
|
-
def delete_folder(f):
|
|
192
|
-
''' delete folder if it exists '''
|
|
193
|
-
if os.path.exists(f):
|
|
194
|
-
shutil.rmtree(f)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
def get_object_folder(obj):
|
|
198
|
-
''' return file folder of Python object '''
|
|
199
|
-
return os.path.absfolder(inspect.getfile(obj))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|