feilian 1.1.7__py3-none-any.whl → 1.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of feilian might be problematic. Click here for more details.
- feilian/__init__.py +2 -0
- feilian/_dist_ver.py +2 -2
- feilian/arg.py +9 -5
- feilian/dataframe.py +28 -9
- feilian/json.py +15 -4
- feilian/utils.py +64 -0
- feilian/version.py +1 -1
- {feilian-1.1.7.dist-info → feilian-1.1.9.dist-info}/METADATA +1 -1
- feilian-1.1.9.dist-info/RECORD +14 -0
- {feilian-1.1.7.dist-info → feilian-1.1.9.dist-info}/WHEEL +1 -1
- feilian-1.1.7.dist-info/RECORD +0 -13
- {feilian-1.1.7.dist-info → feilian-1.1.9.dist-info}/top_level.txt +0 -0
feilian/__init__.py
CHANGED
|
@@ -6,6 +6,7 @@ from .dataframe import is_empty_text, is_nonempty_text, is_blank_text, is_non_bl
|
|
|
6
6
|
from .datetime import format_time, format_date
|
|
7
7
|
from .arg import ArgValueParser
|
|
8
8
|
from .json import read_json, save_json
|
|
9
|
+
from .utils import flatten_dict
|
|
9
10
|
from .version import __version__
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
@@ -15,5 +16,6 @@ __all__ = [
|
|
|
15
16
|
'format_time', 'format_date',
|
|
16
17
|
'ArgValueParser',
|
|
17
18
|
'read_json', 'save_json',
|
|
19
|
+
'flatten_dict',
|
|
18
20
|
'__version__',
|
|
19
21
|
]
|
feilian/_dist_ver.py
CHANGED
feilian/arg.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import (
|
|
4
|
+
Union, List, Any, Iterable,
|
|
5
|
+
Callable, Set, Optional, Tuple,
|
|
6
|
+
Dict, Hashable, Sequence,
|
|
7
|
+
)
|
|
4
8
|
try:
|
|
5
9
|
from typing import Literal
|
|
6
10
|
except ImportError:
|
|
@@ -28,12 +32,12 @@ def _get_or_default(value: Any, mapping: Dict[Hashable, Any], default_key: Any)
|
|
|
28
32
|
|
|
29
33
|
class ArgValueParser(object):
|
|
30
34
|
@classmethod
|
|
31
|
-
def split_and_parse_strs(cls, strings: Union[
|
|
35
|
+
def split_and_parse_strs(cls, strings: Union[Sequence[str], str, None],
|
|
32
36
|
func: Callable[[str], Any] = None,
|
|
33
37
|
sep=',', do_trim=True, ignore_blank=True) -> Iterable[Any]:
|
|
34
38
|
"""
|
|
35
39
|
split and parse multi string values
|
|
36
|
-
:param strings:
|
|
40
|
+
:param strings: sequence of strings
|
|
37
41
|
:param func: function to parse single string value
|
|
38
42
|
:param sep: seperator to split single string
|
|
39
43
|
:param do_trim: trim every word or not
|
|
@@ -51,7 +55,7 @@ class ArgValueParser(object):
|
|
|
51
55
|
yield func(x) if func else x
|
|
52
56
|
|
|
53
57
|
@classmethod
|
|
54
|
-
def split_strs_to_set(cls, values: Union[
|
|
58
|
+
def split_strs_to_set(cls, values: Union[Sequence[str], str, None],
|
|
55
59
|
func: Callable[[str], Any] = None,
|
|
56
60
|
sep=',', do_trim=True, ignore_blank=True) -> Optional[Set[Any]]:
|
|
57
61
|
"""
|
|
@@ -60,7 +64,7 @@ class ArgValueParser(object):
|
|
|
60
64
|
return set(cls.split_and_parse_strs(values, func, sep, do_trim, ignore_blank))
|
|
61
65
|
|
|
62
66
|
@classmethod
|
|
63
|
-
def split_strs_to_list(cls, values: Union[
|
|
67
|
+
def split_strs_to_list(cls, values: Union[Sequence[str], str, None],
|
|
64
68
|
func: Callable[[str], Any] = None,
|
|
65
69
|
sep=',', do_trim=True, ignore_blank=True) -> Optional[List[Any]]:
|
|
66
70
|
"""
|
feilian/dataframe.py
CHANGED
|
@@ -22,8 +22,11 @@ pd_version = [int(x) for x in pd.__version__.split('.')]
|
|
|
22
22
|
if pd_version[0] < 1 or (pd_version[0] == 1 and pd_version[1] < 5):
|
|
23
23
|
PD_PARAM_NEWLINE = 'line_terminator'
|
|
24
24
|
|
|
25
|
+
FILE_FORMAT = Literal['csv', 'tsv', 'json', 'xlsx', 'parquet']
|
|
26
|
+
COMPRESSION_FORMAT = Literal[None, 'infer', 'snappy', 'gzip', 'brotli', 'bz2', 'zip', 'xz']
|
|
27
|
+
|
|
25
28
|
def read_dataframe(file: str, *args, sheet_name=0,
|
|
26
|
-
file_format:
|
|
29
|
+
file_format: FILE_FORMAT = None,
|
|
27
30
|
jsonl=False, dtype: type = None,
|
|
28
31
|
**kwargs) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
|
|
29
32
|
"""
|
|
@@ -31,7 +34,7 @@ def read_dataframe(file: str, *args, sheet_name=0,
|
|
|
31
34
|
:param file: the file to be read
|
|
32
35
|
:param args: extra args for `pd.read_xx()`
|
|
33
36
|
:param sheet_name: `sheet_name` for `pd.read_excel()`
|
|
34
|
-
:param file_format: csv, tsv, json ,xlsx
|
|
37
|
+
:param file_format: csv, tsv, json ,xlsx, parquet
|
|
35
38
|
:param jsonl: jsonl format or not, only used in json format
|
|
36
39
|
:param dtype: `dtype` for `pd.read_xx()`
|
|
37
40
|
:param kwargs: extra kwargs for `pd.read_xx()`
|
|
@@ -46,12 +49,16 @@ def read_dataframe(file: str, *args, sheet_name=0,
|
|
|
46
49
|
if key in kwargs and kwargs.pop(key):
|
|
47
50
|
jsonl = True
|
|
48
51
|
|
|
49
|
-
#
|
|
52
|
+
# handle special formats
|
|
50
53
|
if file_format == 'tsv':
|
|
54
|
+
# if the file format is tsv, actually same as csv
|
|
51
55
|
file_format = 'csv'
|
|
52
56
|
if 'sep' in kwargs:
|
|
53
57
|
kwargs.pop('sep')
|
|
54
58
|
kwargs['delimiter'] = '\t'
|
|
59
|
+
elif file_format == 'jsonl':
|
|
60
|
+
file_format = 'json'
|
|
61
|
+
jsonl = True
|
|
55
62
|
|
|
56
63
|
if file_format == 'csv':
|
|
57
64
|
return pd.read_csv(file, *args, dtype=dtype, **kwargs)
|
|
@@ -59,13 +66,16 @@ def read_dataframe(file: str, *args, sheet_name=0,
|
|
|
59
66
|
return pd.read_excel(file, *args, sheet_name=sheet_name, dtype=dtype, **kwargs)
|
|
60
67
|
elif file_format == 'json':
|
|
61
68
|
return pd.read_json(file, *args, lines=jsonl, dtype=dtype, **kwargs)
|
|
69
|
+
elif file_format == 'parquet':
|
|
70
|
+
return pd.read_parquet(file, *args, **kwargs)
|
|
62
71
|
else:
|
|
63
72
|
raise IOError(f"Unknown file format: {file}")
|
|
64
73
|
|
|
65
74
|
def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[str]'],
|
|
66
75
|
df: Union[pd.DataFrame, Iterable[Union[pd.Series, Dict[str, Any]]]],
|
|
67
76
|
*args, sheet_name='Sheet1',
|
|
68
|
-
file_format:
|
|
77
|
+
file_format: FILE_FORMAT = None,
|
|
78
|
+
compression: COMPRESSION_FORMAT = None,
|
|
69
79
|
index=False, index_label=None,
|
|
70
80
|
encoding='utf-8', newline='\n',
|
|
71
81
|
force_ascii=False,
|
|
@@ -80,7 +90,9 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[st
|
|
|
80
90
|
:param df: the data
|
|
81
91
|
:param args: extra args for df.to_xx()
|
|
82
92
|
:param sheet_name: `sheet_name` for excel format
|
|
83
|
-
:param file_format: csv, tsv, json, xlsx
|
|
93
|
+
:param file_format: csv, tsv, json, xlsx, parquet
|
|
94
|
+
:param compression: name of the compression to use.
|
|
95
|
+
use `None` for no compression.
|
|
84
96
|
:param index: save index or not, see docs in df.to_csv();
|
|
85
97
|
if set as str and `index_label` not set, `index_label` will be set as this
|
|
86
98
|
:param index_label: header for the index when `index` is `True`
|
|
@@ -127,24 +139,31 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[st
|
|
|
127
139
|
if index_label is None and isinstance(index, str):
|
|
128
140
|
index, index_label = True, index
|
|
129
141
|
|
|
130
|
-
#
|
|
142
|
+
# handle special formats
|
|
131
143
|
if file_format == 'tsv':
|
|
144
|
+
# tsv is actually a csv
|
|
132
145
|
file_format = 'csv'
|
|
133
146
|
kwargs['sep'] = '\t'
|
|
147
|
+
elif file_format == 'jsonl':
|
|
148
|
+
file_format = 'json'
|
|
149
|
+
jsonl = True
|
|
134
150
|
|
|
135
151
|
# save to file for different format
|
|
136
152
|
if file_format == 'csv':
|
|
137
153
|
kwargs[PD_PARAM_NEWLINE] = newline
|
|
138
|
-
df.to_csv(file, *args, index=index, index_label=index_label,
|
|
154
|
+
df.to_csv(file, *args, compression=compression, index=index, index_label=index_label,
|
|
155
|
+
encoding=encoding, **kwargs)
|
|
139
156
|
elif file_format == 'xlsx':
|
|
140
157
|
df.to_excel(file, *args, index=index, index_label=index_label, sheet_name=sheet_name, **kwargs)
|
|
141
158
|
elif file_format == 'json':
|
|
142
159
|
if jsonl:
|
|
143
160
|
orient = 'records'
|
|
144
161
|
index = True
|
|
145
|
-
df.to_json(file, *args,
|
|
146
|
-
orient=orient, lines=jsonl,
|
|
162
|
+
df.to_json(file, *args, compression=compression, index=index,
|
|
163
|
+
force_ascii=force_ascii, orient=orient, lines=jsonl,
|
|
147
164
|
**kwargs)
|
|
165
|
+
elif file_format == 'parquet':
|
|
166
|
+
df.to_parquet(file, *args, compression=compression, index=index, **kwargs)
|
|
148
167
|
else:
|
|
149
168
|
raise IOError(f"Unknown file format: {file}")
|
|
150
169
|
|
feilian/json.py
CHANGED
|
@@ -4,18 +4,29 @@ from typing import Dict, List, Union, Any
|
|
|
4
4
|
import json
|
|
5
5
|
from .io import ensure_parent_dir_exist
|
|
6
6
|
|
|
7
|
-
def read_json(filepath: str, encoding='utf-8', **kwargs):
|
|
7
|
+
def read_json(filepath: str, jsonl=False, encoding='utf-8', **kwargs):
|
|
8
8
|
"""
|
|
9
9
|
An agent for `json.load()` with some default value.
|
|
10
10
|
"""
|
|
11
11
|
with open(filepath, encoding=encoding) as f:
|
|
12
|
-
|
|
12
|
+
if jsonl:
|
|
13
|
+
return [json.loads(x) for x in f]
|
|
14
|
+
else:
|
|
15
|
+
return json.load(f, **kwargs)
|
|
13
16
|
|
|
14
|
-
def save_json(filepath: str, data: Union[Dict[str, Any], List[Any]],
|
|
17
|
+
def save_json(filepath: str, data: Union[Dict[str, Any], List[Any]], jsonl=False,
|
|
15
18
|
encoding='utf-8', newline='\n', indent=2, ensure_ascii=False, **kwargs):
|
|
16
19
|
"""
|
|
17
20
|
An agent for `json.dump()` with some default value.
|
|
18
21
|
"""
|
|
22
|
+
if jsonl and not isinstance(data, list):
|
|
23
|
+
# data should be a list
|
|
24
|
+
raise ValueError("data should be a list when save as jsonl format")
|
|
19
25
|
ensure_parent_dir_exist(filepath)
|
|
20
26
|
with open(filepath, 'w', encoding=encoding, newline=newline) as f:
|
|
21
|
-
|
|
27
|
+
if jsonl:
|
|
28
|
+
for x in data:
|
|
29
|
+
f.write(json.dumps(x, ensure_ascii=ensure_ascii, **kwargs))
|
|
30
|
+
f.write(newline)
|
|
31
|
+
else:
|
|
32
|
+
json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii, **kwargs)
|
feilian/utils.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
|
|
4
|
+
from typing import Dict, Any, Union, Collection
|
|
5
|
+
|
|
6
|
+
def flatten_dict(data: Dict[str, Any], prefix="", joiner=".",
|
|
7
|
+
exclude: Union[None, str, Collection[str]] = None,
|
|
8
|
+
frozen: Union[None, str, Collection[str]] = None,
|
|
9
|
+
empty_as_default=False, empty_value=None,
|
|
10
|
+
res: Dict[str, Any] = None) -> Dict[str, Any]:
|
|
11
|
+
"""
|
|
12
|
+
flatten dict as a flat one layer dict
|
|
13
|
+
:param data: origin dict
|
|
14
|
+
:param prefix: prefix for key in the dict
|
|
15
|
+
:param joiner: join symbol for different layer key
|
|
16
|
+
:param exclude: prefix to be excluded from result
|
|
17
|
+
:param frozen: keys not to be flattened
|
|
18
|
+
:param empty_as_default: should set a default value if value is an empty dict
|
|
19
|
+
:param empty_value: if `empty_as_default` is `True`, used as the default value for empty dict
|
|
20
|
+
:param res: the result flat layer dict, create a new one if not given.
|
|
21
|
+
"""
|
|
22
|
+
if res is None:
|
|
23
|
+
res = {}
|
|
24
|
+
if isinstance(exclude, str):
|
|
25
|
+
exclude = {exclude}
|
|
26
|
+
if isinstance(frozen, str):
|
|
27
|
+
frozen = {frozen}
|
|
28
|
+
|
|
29
|
+
# all keys are start with the prefix, ignore data
|
|
30
|
+
if exclude and prefix in exclude:
|
|
31
|
+
return res
|
|
32
|
+
|
|
33
|
+
# all keys in data should be frozen
|
|
34
|
+
if frozen and prefix in frozen:
|
|
35
|
+
for k, v in data.items():
|
|
36
|
+
res[prefix+k] = v
|
|
37
|
+
return res
|
|
38
|
+
|
|
39
|
+
for k, v in data.items():
|
|
40
|
+
k = prefix + k
|
|
41
|
+
|
|
42
|
+
if exclude and k in exclude:
|
|
43
|
+
# only the key should be excluded
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
if frozen and k in frozen:
|
|
47
|
+
# frozen key, keep it as original value
|
|
48
|
+
res[k] = v
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
if isinstance(v, dict):
|
|
52
|
+
if len(v) == 0:
|
|
53
|
+
# empty dict, set as default value if set
|
|
54
|
+
if empty_as_default:
|
|
55
|
+
res[k] = empty_value
|
|
56
|
+
else:
|
|
57
|
+
# value is a dict, flatten recursively
|
|
58
|
+
flatten_dict(v, prefix=k+joiner, joiner=joiner, exclude=exclude, frozen=frozen, res=res)
|
|
59
|
+
else:
|
|
60
|
+
# normal value, keep it as original value
|
|
61
|
+
res[k] = v
|
|
62
|
+
|
|
63
|
+
return res
|
|
64
|
+
|
feilian/version.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
try:
|
|
4
4
|
from ._dist_ver import VERSION, __version__
|
|
5
5
|
except ImportError:
|
|
6
|
-
from
|
|
6
|
+
from importlib_metadata import version, PackageNotFoundError
|
|
7
7
|
try:
|
|
8
8
|
__version__ = version('feilian')
|
|
9
9
|
except PackageNotFoundError:
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
feilian/__init__.py,sha256=Flac-edWcAozHlL4aAdSJX0rphgSdplFrUUlkr5q7ro,815
|
|
2
|
+
feilian/_dist_ver.py,sha256=gRZX3slfIQ_HsndEWgYKrFuTPKqi9wcoGNPZZAaGPTI,148
|
|
3
|
+
feilian/arg.py,sha256=n2nIcmC_3rb9A6BOzm9C5z3-T4lnubaGzH2sFhtqwZQ,8402
|
|
4
|
+
feilian/dataframe.py,sha256=G7Ai_JsMS7kNfqRNptqGOOxGWjyYwcJrT73IttDO1vo,10653
|
|
5
|
+
feilian/datetime.py,sha256=IONvWhLeGEy9IVe6GWKEW3FhrfRrShyhGP8-RTf9r3c,763
|
|
6
|
+
feilian/io.py,sha256=aYN3QwWcLoRKzhGMNutqdkmxArVcXfeWXzxCB07LcFc,155
|
|
7
|
+
feilian/json.py,sha256=PSjDJ3MCdolKwfAOmT9DuS8KnJZo9oGABKgJDduCliU,1187
|
|
8
|
+
feilian/string.py,sha256=G_X3dnR0Oxmi4hXF-6E5jm5M7GPjGoMYrSMyI1dj6Z4,370
|
|
9
|
+
feilian/utils.py,sha256=DqBKjpRBbSNipRDau9sYnoCfSDR_Of-xTOCoNxGWUJk,2180
|
|
10
|
+
feilian/version.py,sha256=oH_DvE7jRCWlCCX9SSadwxwRJXFas_rIisYLBGPYZn4,350
|
|
11
|
+
feilian-1.1.9.dist-info/METADATA,sha256=KA03FrgOCulwqzLrodlh76uKxVnISbxpchq8GGDQfvw,902
|
|
12
|
+
feilian-1.1.9.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
13
|
+
feilian-1.1.9.dist-info/top_level.txt,sha256=1Q2-B6KJrcTr7drW_kik35PTVEUJLPP4wVrn0kYKwGw,8
|
|
14
|
+
feilian-1.1.9.dist-info/RECORD,,
|
feilian-1.1.7.dist-info/RECORD
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
feilian/__init__.py,sha256=D_OYSAfDALQnRsbaAXMnHyC0LtykaIXLTHq7XNcsYjU,763
|
|
2
|
-
feilian/_dist_ver.py,sha256=J13_krnbZzV1wGrNVLJT8EIwC3Tx29u3m5Bykt3ljFc,148
|
|
3
|
-
feilian/arg.py,sha256=lGZ99RkU9uhE_ziaFK2SClS3vrygteNNxTm3i_68SWw,8359
|
|
4
|
-
feilian/dataframe.py,sha256=ac-8RqE2YTFmE6Gk-BdqMRAC-VqSYfyC1y1gwJe3fWQ,9838
|
|
5
|
-
feilian/datetime.py,sha256=IONvWhLeGEy9IVe6GWKEW3FhrfRrShyhGP8-RTf9r3c,763
|
|
6
|
-
feilian/io.py,sha256=aYN3QwWcLoRKzhGMNutqdkmxArVcXfeWXzxCB07LcFc,155
|
|
7
|
-
feilian/json.py,sha256=1GsnL-CASi4xBaycMN-Tw1ytxty7GeL2wmt7nfLfnB4,754
|
|
8
|
-
feilian/string.py,sha256=G_X3dnR0Oxmi4hXF-6E5jm5M7GPjGoMYrSMyI1dj6Z4,370
|
|
9
|
-
feilian/version.py,sha256=rwuYOfaG8mzghVTba-c5zBWNrj-wZaE8iO7fX4tcGUQ,350
|
|
10
|
-
feilian-1.1.7.dist-info/METADATA,sha256=Yolz6Nlb4bd_eTpYi2BzavCnzs6OWefQWdnv5LKiR2E,902
|
|
11
|
-
feilian-1.1.7.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
12
|
-
feilian-1.1.7.dist-info/top_level.txt,sha256=1Q2-B6KJrcTr7drW_kik35PTVEUJLPP4wVrn0kYKwGw,8
|
|
13
|
-
feilian-1.1.7.dist-info/RECORD,,
|
|
File without changes
|