feilian 1.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
feilian/__init__.py ADDED
@@ -0,0 +1,31 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from .io import ensure_parent_dir_exist
4
+ from .dataframe import read_dataframe, save_dataframe, extract_dataframe_sample, merge_dataframe_rows, iter_dataframe
5
+ from .dataframe import is_empty_text, is_nonempty_text, is_blank_text, is_non_blank_text
6
+ from .datetime import format_time, format_date
7
+ from .arg import ArgValueParser
8
+ from .json import read_json, save_json, write_json, read_big_json
9
+ from .txt import (
10
+ detect_stream_encoding, detect_file_encoding, get_file_encoding,
11
+ read_txt, save_txt, write_txt,
12
+ )
13
+ from .process import DataframeProcessor
14
+ from .excel import save_excel, write_excel
15
+ from .utils import flatten_dict, flatten_list
16
+ from .version import __version__
17
+
18
+ __all__ = [
19
+ 'ensure_parent_dir_exist',
20
+ 'read_dataframe', 'save_dataframe', 'extract_dataframe_sample', 'merge_dataframe_rows', 'iter_dataframe',
21
+ 'is_empty_text', 'is_nonempty_text', 'is_blank_text', 'is_non_blank_text',
22
+ 'format_time', 'format_date',
23
+ 'ArgValueParser',
24
+ 'read_json', 'save_json', 'write_json', 'read_big_json',
25
+ 'detect_stream_encoding', 'detect_file_encoding', 'get_file_encoding',
26
+ 'read_txt', 'save_txt', 'write_txt',
27
+ 'save_excel', 'write_excel',
28
+ 'DataframeProcessor',
29
+ 'flatten_dict', 'flatten_list',
30
+ '__version__',
31
+ ]
feilian/_dist_ver.py ADDED
@@ -0,0 +1,5 @@
1
+ # -*- coding: utf-8 -*-
2
+ # file generated by setuptools_scm
3
+ # don't change, don't track in version control
4
+ VERSION = (1, 3, 4)
5
+ __version__ = '1.3.4'
feilian/_typing.py ADDED
@@ -0,0 +1,5 @@
1
+ from typing import *
2
+ try:
3
+ from typing import Literal
4
+ except ImportError:
5
+ from typing_extensions import Literal
feilian/arg.py ADDED
@@ -0,0 +1,179 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from ._typing import (
4
+ Union, List, Any, Iterable,
5
+ Callable, Set, Optional, Tuple,
6
+ Dict, Hashable, Sequence,
7
+ Literal,
8
+ )
9
+
10
+ _build_in_na_checkers = {
11
+ 'always_na': lambda x: True,
12
+ 'never_na': lambda x: False,
13
+ 'is_none': lambda x: x is None,
14
+ }
15
+ _NA_CHECKER_TYPES = Union[Callable[[Any], bool], Literal['always_na', 'never_na', 'is_none']]
16
+
17
+ _build_in_na_converters = {
18
+ 'none': lambda x: None,
19
+ 'self': lambda x: x,
20
+ 'empty': lambda x: [],
21
+ 'single': lambda x: [x],
22
+ }
23
+ _NA_CONVERTER_TYPES = Union[Callable[[Any], Any], Literal['none', 'self', 'empty', 'single']]
24
+
25
+ def _get_or_default(value: Any, mapping: Dict[Hashable, Any], default_key: Any) -> Any:
26
+ if value is None:
27
+ return mapping[default_key]
28
+ return mapping.get(value, value)
29
+
30
+ class ArgValueParser(object):
31
+ @classmethod
32
+ def split_and_parse_strs(cls, strings: Union[Sequence[str], str, None],
33
+ func: Callable[[str], Any] = None,
34
+ sep=',', do_trim=True, ignore_blank=True) -> Iterable[Any]:
35
+ """
36
+ split and parse multi string values
37
+ :param strings: sequence of strings
38
+ :param func: function to parse single string value
39
+ :param sep: seperator to split single string
40
+ :param do_trim: trim every word or not
41
+ :param ignore_blank: ignore blank or not; in some case, this must be `True`
42
+ """
43
+ if isinstance(strings, str):
44
+ strings = [strings]
45
+ if strings:
46
+ for value in strings:
47
+ for x in value.split(sep):
48
+ if do_trim:
49
+ x = x.strip()
50
+ if not x and ignore_blank:
51
+ continue
52
+ yield func(x) if func else x
53
+
54
+ @classmethod
55
+ def split_strs_to_set(cls, values: Union[Sequence[str], str, None],
56
+ func: Callable[[str], Any] = None,
57
+ sep=',', do_trim=True, ignore_blank=True) -> Optional[Set[Any]]:
58
+ """
59
+ split multi string values as words to a set
60
+ """
61
+ return set(cls.split_and_parse_strs(values, func, sep, do_trim, ignore_blank))
62
+
63
+ @classmethod
64
+ def split_strs_to_list(cls, values: Union[Sequence[str], str, None],
65
+ func: Callable[[str], Any] = None,
66
+ sep=',', do_trim=True, ignore_blank=True) -> Optional[List[Any]]:
67
+ """
68
+ split multi string values as words to a list
69
+ """
70
+ return list(cls.split_and_parse_strs(values, func, sep, do_trim, ignore_blank))
71
+
72
+ @staticmethod
73
+ def bound_collection_if_singleton(value: Any, collection_type: type,
74
+ elem_type: Union[type, Tuple[type]] = (),
75
+ allowed_type=(list, tuple, set)) -> Any:
76
+ """
77
+ if `value` is a singleton element, bound it with a collection type.
78
+ :param value: the input value may be bounded
79
+ :param collection_type: may be `list`, `tuple` or `set`
80
+ :param elem_type: if `value` is instance of `elem_type` but not instance of `allowed_type`, bound it.
81
+ :param allowed_type: if `value` is instance of `allowed_type`, return the `value` unchanged.
82
+ """
83
+ if isinstance(value, allowed_type):
84
+ return value
85
+ if isinstance(value, elem_type):
86
+ return collection_type([value])
87
+ return value
88
+
89
+ @classmethod
90
+ def bound_list_if_singleton(cls, value: Any, elem_type: Union[type, Tuple[type]] = (),
91
+ allowed_type=(list, tuple, set)) -> Any:
92
+ """
93
+ If `value` is a singleton element, bound it to be a `list`.
94
+ See more arg docs in `bound_collection_if_singleton()`.
95
+ """
96
+ return cls.bound_collection_if_singleton(value, collection_type=list,
97
+ elem_type=elem_type, allowed_type=allowed_type)
98
+
99
+ @classmethod
100
+ def bound_tuple_if_singleton(cls, value: Any, elem_type: Union[type, Tuple[type]] = (),
101
+ allowed_type=(list, tuple, set)) -> Any:
102
+ """
103
+ If `value` is a singleton element, bound it to be a `tuple`.
104
+ See more arg docs in `bound_collection_if_singleton()`.
105
+ """
106
+ return cls.bound_collection_if_singleton(value, collection_type=tuple,
107
+ elem_type=elem_type, allowed_type=allowed_type)
108
+
109
+ @classmethod
110
+ def bound_set_if_singleton(cls, value: Any, elem_type: Union[type, Tuple[type]] = (),
111
+ allowed_type=(list, tuple, set)) -> Any:
112
+ """
113
+ If `value` is a singleton element, bound it to be a `set`.
114
+ See more arg docs in `bound_collection_if_singleton()`.
115
+ """
116
+ return cls.bound_collection_if_singleton(value, collection_type=set,
117
+ elem_type=elem_type, allowed_type=allowed_type)
118
+
119
+ @staticmethod
120
+ def ensure_collection(value: Any, expected_type: type, collection_type: Union[type, Tuple[type, ...]],
121
+ na_checker: _NA_CHECKER_TYPES = None, na_converter: _NA_CONVERTER_TYPES = None) -> Any:
122
+ """
123
+ Ensure the value to be a list, tuple or set.
124
+ :param value: any type value
125
+ :param expected_type: expected return type, can be list, tuple or set
126
+ :param collection_type: other collection type to be convert
127
+ :param na_checker: check if `value` is na, default is 'is_none'
128
+ str value means some built-in functions:
129
+ always_na: always treat the value as na
130
+ never_na: never treat the value as na
131
+ is_none: test if the value is `None`
132
+ :param na_converter: if `value` is na, return output of this function, default is 'self'
133
+ str value means some built-in functions:
134
+ none: `None`
135
+ self: the value no changed
136
+ empty: an empty list
137
+ single: a single value list: `[value]`
138
+ :return: expected to be an instance of `expected_type`, or be `None` for some condition
139
+ """
140
+ na_checker = _get_or_default(na_checker, _build_in_na_checkers, 'is_none')
141
+ if na_checker(value):
142
+ na_converter = _get_or_default(na_converter, _build_in_na_converters, 'self')
143
+ return na_converter(value)
144
+ if isinstance(value, expected_type):
145
+ return value
146
+ if isinstance(value, collection_type):
147
+ return expected_type(value)
148
+ return expected_type([value])
149
+
150
+ @classmethod
151
+ def ensure_list(cls, value: Any, na_checker: _NA_CHECKER_TYPES = None,
152
+ na_converter: _NA_CONVERTER_TYPES = None) -> Optional[List[Any]]:
153
+ """
154
+ Ensure the value to be a list.
155
+ See more arg docs in `ensure_collection()`.
156
+ """
157
+ return cls.ensure_collection(value, expected_type=list, collection_type=(tuple, set),
158
+ na_checker=na_checker, na_converter=na_converter)
159
+
160
+ @classmethod
161
+ def ensure_tuple(cls, value: Any, na_checker: _NA_CHECKER_TYPES = None,
162
+ na_converter: _NA_CONVERTER_TYPES = None) -> Optional[Tuple[Any]]:
163
+ """
164
+ Ensure the value to be a tuple.
165
+ See more arg docs in `ensure_collection()`.
166
+ """
167
+ return cls.ensure_collection(value, expected_type=tuple, collection_type=(list, set),
168
+ na_checker=na_checker, na_converter=na_converter)
169
+
170
+ @classmethod
171
+ def ensure_set(cls, value: Any, na_checker: _NA_CHECKER_TYPES = None,
172
+ na_converter: _NA_CONVERTER_TYPES = None) -> Optional[Set[Any]]:
173
+ """
174
+ Ensure the value to be a set.
175
+ See more arg docs in `ensure_collection()`.
176
+ """
177
+ return cls.ensure_collection(value, expected_type=set, collection_type=(list, tuple),
178
+ na_checker=na_checker, na_converter=na_converter)
179
+
feilian/dataframe.py ADDED
@@ -0,0 +1,311 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """
4
+ Encapsulate methods for pandas `DataFrame`.
5
+ """
6
+
7
+ import io
8
+ import os
9
+ import pathlib
10
+ import pandas as pd
11
+ import random
12
+ import collections
13
+ from ._typing import Union, Iterable, Dict, List, Any, Sequence, Callable, Tuple, Hashable, Literal
14
+ from .io import ensure_parent_dir_exist
15
+ from .txt import detect_stream_encoding, detect_file_encoding
16
+
17
+ # Compatible with different pandas versions
18
+ PD_PARAM_NEWLINE = 'lineterminator'
19
+ pd_version = [int(x) for x in pd.__version__.split('.')]
20
+ if pd_version[0] < 1 or (pd_version[0] == 1 and pd_version[1] < 5):
21
+ PD_PARAM_NEWLINE = 'line_terminator'
22
+
23
+ FILE_FORMAT = Literal['csv', 'tsv', 'json', 'xlsx', 'parquet']
24
+ COMPRESSION_FORMAT = Literal[None, 'infer', 'snappy', 'gzip', 'brotli', 'bz2', 'zip', 'xz']
25
+
26
+ def _drop_na_values(data: Union[pd.DataFrame, Dict[str, pd.DataFrame]], axis: Literal['columns', 'rows']):
27
+ if isinstance(data, pd.DataFrame):
28
+ data.dropna(axis=axis, how='all', inplace=True)
29
+ else:
30
+ assert isinstance(data, dict)
31
+ for df in data.values():
32
+ df.dropna(axis=axis, how='all', inplace=True)
33
+
34
+ def _infer_file_format(file) -> str:
35
+ if isinstance(file, pd.ExcelWriter):
36
+ return 'xlsx'
37
+ elif isinstance(file, str):
38
+ return os.path.splitext(file)[1].lower()[1:]
39
+ elif isinstance(file, pathlib.PurePath):
40
+ suf = file.suffix
41
+ return suf[1:] if suf.startswith('.') else suf
42
+ elif isinstance(file, os.PathLike):
43
+ return os.path.splitext(os.fspath(file))[1].lower().lstrip('.')
44
+ else:
45
+ raise ValueError(f"Cannot infer format for type: {type(file)}")
46
+
47
+ def read_dataframe(file: Union[str, os.PathLike, io.IOBase], *args, sheet_name=0,
48
+ file_format: FILE_FORMAT = None, encoding='auto',
49
+ jsonl=False, dtype: type = None,
50
+ drop_na_columns=False, drop_na_rows=False,
51
+ **kwargs) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
52
+ """
53
+ read file as pandas `DataFrame`
54
+ :param file: the file to be read
55
+ :param args: extra args for `pd.read_xx()`
56
+ :param sheet_name: `sheet_name` for `pd.read_excel()`
57
+ :param file_format: csv, tsv, json ,xlsx, parquet
58
+ :param encoding: text file encoding
59
+ :param jsonl: jsonl format or not, only used in json format
60
+ :param dtype: `dtype` for `pd.read_xx()`
61
+ :param drop_na_columns: drop column if all values of the column is na
62
+ :param drop_na_rows: drop row if all values of the row is na
63
+ :param kwargs: extra kwargs for `pd.read_xx()`
64
+ """
65
+ # decide the file format
66
+ if not file_format:
67
+ file_format = _infer_file_format(file)
68
+
69
+ for key in ['lines', 'line_delimited_json_format']:
70
+ if key in kwargs and kwargs.pop(key):
71
+ jsonl = True
72
+
73
+ # handle special formats
74
+ if file_format == 'tsv':
75
+ # if the file format is tsv, actually same as csv
76
+ file_format = 'csv'
77
+ if 'sep' in kwargs:
78
+ kwargs.pop('sep')
79
+ kwargs['delimiter'] = '\t'
80
+ elif file_format == 'jsonl':
81
+ file_format = 'json'
82
+ jsonl = True
83
+
84
+ # detect encoding
85
+ if encoding == 'auto' and file_format in ['csv', 'json']:
86
+ if isinstance(file, (str, os.PathLike)):
87
+ encoding = detect_file_encoding(file)
88
+ elif isinstance(file, io.IOBase) and file.seekable():
89
+ tell = file.tell()
90
+ encoding = detect_stream_encoding(file)
91
+ file.seek(tell)
92
+ else:
93
+ # read file may cause content change, so we cannot detect the encoding
94
+ encoding = None
95
+
96
+ if file_format == 'csv':
97
+ df = pd.read_csv(file, *args, encoding=encoding, dtype=dtype, **kwargs)
98
+ elif file_format == 'xlsx':
99
+ df = pd.read_excel(file, *args, sheet_name=sheet_name, dtype=dtype, **kwargs)
100
+ elif file_format == 'json':
101
+ try:
102
+ df = pd.read_json(file, *args, encoding=encoding, lines=jsonl, dtype=dtype, **kwargs)
103
+ except Exception as e:
104
+ # if failed, try again with different arg `lines`
105
+ try:
106
+ df = pd.read_json(file, *args, lines=not jsonl, dtype=dtype, **kwargs)
107
+ except Exception:
108
+ raise e
109
+ elif file_format == 'parquet':
110
+ df = pd.read_parquet(file, *args, **kwargs)
111
+ else:
112
+ raise IOError(f"Unknown file format: {file}")
113
+
114
+ if drop_na_columns:
115
+ _drop_na_values(df, axis='columns')
116
+ if drop_na_rows:
117
+ _drop_na_values(df, axis='rows')
118
+
119
+ return df
120
+
121
+ def save_dataframe(file: Union[str, os.PathLike, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[str]'],
122
+ df: Union[pd.DataFrame, Iterable[Union[pd.Series, Dict[str, Any]]]],
123
+ *args, sheet_name='Sheet1',
124
+ file_format: FILE_FORMAT = None,
125
+ compression: COMPRESSION_FORMAT = None,
126
+ index=False, index_label=None,
127
+ encoding='utf-8', newline='\n',
128
+ force_ascii=False,
129
+ orient='records', jsonl=True, indent=None,
130
+ column_mapper: Union[Dict[str, str], Sequence[str]] = None,
131
+ include_columns: Sequence[str] = None,
132
+ exclude_columns: Sequence[str] = None,
133
+ **kwargs):
134
+ """
135
+ save data into file
136
+ :param file: where to save the data to
137
+ :param df: the data
138
+ :param args: extra args for df.to_xx()
139
+ :param sheet_name: `sheet_name` for excel format
140
+ :param file_format: csv, tsv, json, xlsx, parquet
141
+ :param compression: name of the compression to use.
142
+ use `None` for no compression.
143
+ :param index: save index or not, see docs in df.to_csv();
144
+ if set as str and `index_label` not set, `index_label` will be set as this
145
+ :param index_label: header for the index when `index` is `True`
146
+ :param encoding: text file encoding
147
+ :param newline: text file newline
148
+ :param force_ascii: `force_ascii` for json format
149
+ :param orient: `orient` for json format
150
+ :param jsonl: jsonl format or not
151
+ :param indent: indent for json format
152
+ :param column_mapper: rename columns; if set, columns not list here will be ignored
153
+ :param include_columns: if set, columns not list here will be ignored
154
+ :param exclude_columns: if set, columns list here will be ignored
155
+ :param kwargs: extra kwargs for df.to_xx()
156
+ """
157
+ # decide file format
158
+ if not file_format:
159
+ file_format = _infer_file_format(file)
160
+
161
+ # convert data to be a dataframe
162
+ if not isinstance(df, pd.DataFrame):
163
+ df = pd.DataFrame(df)
164
+
165
+ for key in ['lines', 'line_delimited_json_format']:
166
+ if key in kwargs and kwargs.pop(key):
167
+ jsonl = True
168
+
169
+ # deal with columns
170
+ if column_mapper:
171
+ df = df.rename(columns=column_mapper)
172
+ if exclude_columns:
173
+ df = df.drop(exclude_columns, axis=1)
174
+ if include_columns:
175
+ df = df.reindex(include_columns, axis=1)
176
+
177
+ # ensure parent dir exists
178
+ if isinstance(file, (str, os.PathLike)):
179
+ ensure_parent_dir_exist(file)
180
+
181
+ # compatible for set index just use arg `index`
182
+ if index_label is None and isinstance(index, str):
183
+ index, index_label = True, index
184
+
185
+ # handle special formats
186
+ if file_format == 'tsv':
187
+ # tsv is actually a csv
188
+ file_format = 'csv'
189
+ kwargs['sep'] = '\t'
190
+ elif file_format == 'jsonl':
191
+ file_format = 'json'
192
+ jsonl = True
193
+
194
+ # save to file for different format
195
+ if file_format == 'csv':
196
+ kwargs[PD_PARAM_NEWLINE] = newline
197
+ df.to_csv(file, *args, compression=compression, index=index, index_label=index_label,
198
+ encoding=encoding, **kwargs)
199
+ elif file_format == 'xlsx':
200
+ df.to_excel(file, *args, index=index, index_label=index_label, sheet_name=sheet_name, **kwargs)
201
+ elif file_format == 'json':
202
+ if jsonl:
203
+ orient = 'records'
204
+ indent = None
205
+ if orient not in ['split', 'table']:
206
+ index = True
207
+ df.to_json(file, *args, compression=compression, index=index,
208
+ force_ascii=force_ascii, orient=orient, lines=jsonl,
209
+ indent=indent, **kwargs)
210
+ elif file_format == 'parquet':
211
+ df.to_parquet(file, *args, compression=compression, index=index, **kwargs)
212
+ else:
213
+ raise IOError(f"Unknown file format: {file}")
214
+
215
+ def iter_dataframe(data: pd.DataFrame,
216
+ progress_bar: Union[bool, str, 'tqdm', Callable[[Iterable[Any]], 'tqdm']] = False
217
+ ) -> Iterable[Tuple[Hashable, pd.Series]]:
218
+ """
219
+ iter dataframe rows, may show a progress bar
220
+ :param data: dataframe
221
+ :param progress_bar: show a progress bar or not
222
+ if set a non-empty string, the string will be set as the progress bar description
223
+ """
224
+ rows = data.iterrows()
225
+ if progress_bar:
226
+ from tqdm import tqdm
227
+ if isinstance(progress_bar, tqdm):
228
+ progress_bar.iterable = rows
229
+ rows = progress_bar
230
+ elif isinstance(progress_bar, str):
231
+ rows = tqdm(rows, total=len(data), desc=progress_bar)
232
+ elif callable(progress_bar):
233
+ rows = progress_bar(rows)
234
+ else:
235
+ rows = tqdm(rows, total=len(data))
236
+ return rows
237
+
238
+ def extract_dataframe_sample(data: pd.DataFrame,
239
+ filter_func: Callable[[pd.Series], bool],
240
+ size=0, shuffle=False,
241
+ return_format: Literal['df', 'dataframe', 'list'] = 'dataframe',
242
+ progress_bar=False) -> Union[pd.DataFrame, List[pd.Series]]:
243
+ """
244
+ extract sample from a dataframe
245
+ :param data: original data
246
+ :param filter_func: bool function, `True` means to reserve the row
247
+ :param size: max size for the result
248
+ :param shuffle: shuffle result or not
249
+ :param progress_bar: passed to `iter_dataframe()`
250
+ :param return_format: one of {'dataframe', 'list'}
251
+ """
252
+ result = [row for _, row in iter_dataframe(data, progress_bar=progress_bar) if filter_func(row)]
253
+ if shuffle:
254
+ random.shuffle(result)
255
+ if 0 < size < len(result):
256
+ result = result[:size]
257
+ if return_format == 'df' or return_format == 'dataframe':
258
+ try:
259
+ return pd.DataFrame(result)
260
+ except pd.errors.InvalidIndexError:
261
+ return pd.DataFrame([{k: v for k, v in x.items()} for x in result])
262
+ elif return_format == 'list':
263
+ return result
264
+ raise ValueError("Param 'return_format' should be one of {'dataframe', 'list'}.")
265
+
266
+ def is_empty_text(s: str) -> bool:
267
+ return pd.isna(s) or not s
268
+
269
+ def is_nonempty_text(s: str) -> bool:
270
+ return pd.notna(s) and isinstance(s, str) and s
271
+
272
+ def is_blank_text(s: str) -> bool:
273
+ return pd.isna(s) or isinstance(s, str) and not s.strip()
274
+
275
+ def is_non_blank_text(s: str) -> bool:
276
+ return pd.notna(s) and isinstance(s, str) and s.strip()
277
+
278
+ def join_values(values: Sequence[Any], sep=None) -> str:
279
+ if not values:
280
+ return ''
281
+ if len(values) == 1:
282
+ return str(values[0])
283
+ return sep.join(map(str, values)) if sep else values
284
+
285
+ def merge_dataframe_rows(data: pd.DataFrame, col_id='ID', na=None, join_sep=None, progress_bar=False) -> pd.DataFrame:
286
+ """
287
+ merge rows of same id to one row, similar to group by in sql
288
+ :param data: original data
289
+ :param col_id: column name for the id col
290
+ :param na: values to be treated as na
291
+ :param join_sep: seperator to join multi values
292
+ :param progress_bar: passed to `iter_dataframe()`
293
+ """
294
+ if na is None:
295
+ na = set()
296
+ elif isinstance(na, str):
297
+ na = {na}
298
+ else:
299
+ na = set(na)
300
+ counts = collections.defaultdict(lambda: collections.defaultdict(collections.Counter))
301
+ rows = iter_dataframe(data, progress_bar=progress_bar)
302
+ for i, row in rows:
303
+ eid = row[col_id]
304
+ for k, v in row.items():
305
+ if pd.notna(v) and v not in na:
306
+ counts[eid][k][v] += 1
307
+ result = []
308
+ for x in counts.values():
309
+ item = {col: join_values(list(values.keys()), sep=join_sep) for col, values in x.items()}
310
+ result.append(item)
311
+ return pd.DataFrame(result)
feilian/datetime.py ADDED
@@ -0,0 +1,21 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from ._typing import Union
4
+ import pandas as pd
5
+ import datetime
6
+
7
+ def format_time(time: Union[str, int, float, datetime.datetime] = None, fmt='%Y-%m-%d %H:%M:%S') -> str:
8
+ if time is None:
9
+ time = datetime.datetime.now()
10
+ elif isinstance(time, (int, float)):
11
+ time = datetime.datetime.fromtimestamp(time)
12
+ elif isinstance(time, str):
13
+ time = pd.to_datetime(time)
14
+ else:
15
+ if not isinstance(time, datetime.datetime):
16
+ raise ValueError(f"Unexpected type: {type(time)}")
17
+ return time.strftime(fmt)
18
+
19
+ # when format a date, no sep is used more
20
+ def format_date(date: Union[str, int, float, datetime.datetime] = None, sep='') -> str:
21
+ return format_time(date, fmt=sep.join(['%Y', '%m', '%d']))
feilian/excel.py ADDED
@@ -0,0 +1,83 @@
1
+ import pandas as pd
2
+ from ._typing import Union, Iterable, Dict, Sequence, Any, List, Tuple
3
+ from .dataframe import save_dataframe
4
+
5
+ def _save_excel(file, df, *args, **kwargs):
6
+ # if df is a list of dataframe, then save each dataframe into a sheet
7
+ if isinstance(df, (list, tuple)) and df and all(isinstance(x, pd.DataFrame) for x in df):
8
+ if 'sheet_name' in kwargs:
9
+ kwargs.pop('sheet_name')
10
+ with pd.ExcelWriter(file) as writer:
11
+ for i, x in enumerate(df, 1):
12
+ save_dataframe(writer, x, *args, sheet_name=f"Sheet{i}", **kwargs)
13
+ elif isinstance(df, dict) and df and all(isinstance(x, pd.DataFrame) for x in df.values()):
14
+ if 'sheet_name' in kwargs:
15
+ kwargs.pop('sheet_name')
16
+ with pd.ExcelWriter(file) as writer:
17
+ for name, x in df.items():
18
+ save_dataframe(writer, x, *args, sheet_name=name, **kwargs)
19
+ else:
20
+ return save_dataframe(file, df, *args, **kwargs)
21
+
22
+ _FILE_TYPES = Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[str]']
23
+ _DATA_TYPES = Union[
24
+ pd.DataFrame, Iterable[Union[pd.Series, Dict[str, Any]]],
25
+ List[pd.DataFrame], Tuple[pd.DataFrame], Dict[str, pd.DataFrame]
26
+ ]
27
+
28
+ def save_excel(file: _FILE_TYPES, df: _DATA_TYPES,
29
+ *args, sheet_name='Sheet1',
30
+ header: Union[Sequence[str], bool] = True,
31
+ index=False, index_label=None,
32
+ column_mapper: Union[Dict[str, str], Sequence[str]] = None,
33
+ include_columns: Sequence[str] = None,
34
+ exclude_columns: Sequence[str] = None,
35
+ **kwargs):
36
+ """
37
+ save data into file
38
+ :param file: where to save the data to
39
+ :param df: the data
40
+ :param args: extra args for df.to_xx()
41
+ :param sheet_name: `sheet_name` for excel format
42
+ :param header: `header` for excel format
43
+ :param index: save index or not, see docs in df.to_csv();
44
+ if set as str and `index_label` not set, `index_label` will be set as this
45
+ :param index_label: header for the index when `index` is `True`
46
+ :param column_mapper: rename columns; if set, columns not list here will be ignored
47
+ :param include_columns: if set, columns not list here will be ignored
48
+ :param exclude_columns: if set, columns list here will be ignored
49
+ :param kwargs: extra kwargs for df.to_xx()
50
+ """
51
+ _save_excel(
52
+ file, df, *args,
53
+ sheet_name=sheet_name,
54
+ header=header,
55
+ index=index,
56
+ index_label=index_label,
57
+ column_mapper=column_mapper,
58
+ include_columns=include_columns,
59
+ exclude_columns=exclude_columns,
60
+ **kwargs
61
+ )
62
+
63
+ def write_excel(
64
+ file: _FILE_TYPES, df: _DATA_TYPES,
65
+ *args, sheet_name='Sheet1',
66
+ header: Union[Sequence[str], bool] = True,
67
+ index=False, index_label=None,
68
+ column_mapper: Union[Dict[str, str], Sequence[str]] = None,
69
+ include_columns: Sequence[str] = None,
70
+ exclude_columns: Sequence[str] = None,
71
+ **kwargs
72
+ ):
73
+ save_excel(
74
+ file, df, *args,
75
+ sheet_name=sheet_name,
76
+ header=header,
77
+ index=index,
78
+ index_label=index_label,
79
+ column_mapper=column_mapper,
80
+ include_columns=include_columns,
81
+ exclude_columns=exclude_columns,
82
+ **kwargs
83
+ )
feilian/io.py ADDED
@@ -0,0 +1,6 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import os
4
+
5
+ def ensure_parent_dir_exist(filepath: str):
6
+ os.makedirs(os.path.abspath(os.path.dirname(filepath)), exist_ok=True)